diff options
Diffstat (limited to 'net')
573 files changed, 27268 insertions, 12502 deletions
diff --git a/net/802/Kconfig b/net/802/Kconfig index be33d27..80d4bf7 100644 --- a/net/802/Kconfig +++ b/net/802/Kconfig @@ -5,3 +5,6 @@ config STP config GARP tristate select STP + +config MRP + tristate diff --git a/net/802/Makefile b/net/802/Makefile index a30d6e3..37e654d 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o obj-$(CONFIG_ATALK) += p8022.o psnap.o obj-$(CONFIG_STP) += stp.o obj-$(CONFIG_GARP) += garp.o +obj-$(CONFIG_MRP) += mrp.o diff --git a/net/802/mrp.c b/net/802/mrp.c new file mode 100644 index 0000000..e085bcc --- /dev/null +++ b/net/802/mrp.c @@ -0,0 +1,899 @@ +/* + * IEEE 802.1Q Multiple Registration Protocol (MRP) + * + * Copyright (c) 2012 Massachusetts Institute of Technology + * + * Adapted from code in net/802/garp.c + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/timer.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <net/mrp.h> +#include <asm/unaligned.h> + +static unsigned int mrp_join_time __read_mostly = 200; +module_param(mrp_join_time, uint, 0644); +MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); +MODULE_LICENSE("GPL"); + +static const u8 +mrp_applicant_state_table[MRP_APPLICANT_MAX + 1][MRP_EVENT_MAX + 1] = { + [MRP_APPLICANT_VO] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_VP, + [MRP_EVENT_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_TX] = MRP_APPLICANT_VO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_VO, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AO, + [MRP_EVENT_R_IN] = MRP_APPLICANT_VO, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VO, + [MRP_EVENT_R_MT] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VO, + }, + [MRP_APPLICANT_VP] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_VP, + [MRP_EVENT_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_TX] = MRP_APPLICANT_AA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_VP, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AP, + [MRP_EVENT_R_IN] = MRP_APPLICANT_VP, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VP, + [MRP_EVENT_R_MT] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VP, + }, + [MRP_APPLICANT_VN] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_VN, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_AN, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_VN, + [MRP_EVENT_R_IN] = MRP_APPLICANT_VN, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VN, + [MRP_EVENT_R_MT] = MRP_APPLICANT_VN, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VN, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VN, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VN, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VN, + }, + [MRP_APPLICANT_AN] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_AN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AN, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AN, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AN, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AN, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AN, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AN, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VN, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VN, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VN, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AN, + }, + [MRP_APPLICANT_AA] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AA, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AA, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QA, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AA, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AA, + }, + [MRP_APPLICANT_QA] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_QA, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_QA, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QA, + [MRP_EVENT_R_IN] = MRP_APPLICANT_QA, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AA, + }, + [MRP_APPLICANT_LA] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AA, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_VO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_LA, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_LA, + [MRP_EVENT_R_IN] = MRP_APPLICANT_LA, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_LA, + [MRP_EVENT_R_MT] = MRP_APPLICANT_LA, + [MRP_EVENT_R_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_R_LA] = MRP_APPLICANT_LA, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_LA, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_LA, + }, + [MRP_APPLICANT_AO] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AP, + [MRP_EVENT_LV] = MRP_APPLICANT_AO, + [MRP_EVENT_TX] = MRP_APPLICANT_AO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AO, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QO, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AO, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AO, + }, + [MRP_APPLICANT_QO] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_QP, + [MRP_EVENT_LV] = MRP_APPLICANT_QO, + [MRP_EVENT_TX] = MRP_APPLICANT_QO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_QO, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QO, + [MRP_EVENT_R_IN] = MRP_APPLICANT_QO, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_QO, + }, + [MRP_APPLICANT_AP] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AP, + [MRP_EVENT_LV] = MRP_APPLICANT_AO, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AP, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QP, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AP, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AP, + }, + [MRP_APPLICANT_QP] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_QP, + [MRP_EVENT_LV] = MRP_APPLICANT_QO, + [MRP_EVENT_TX] = MRP_APPLICANT_QP, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_QP, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QP, + [MRP_EVENT_R_IN] = MRP_APPLICANT_QP, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AP, + }, +}; + +static const u8 +mrp_tx_action_table[MRP_APPLICANT_MAX + 1] = { + [MRP_APPLICANT_VO] = MRP_TX_ACTION_S_IN_OPTIONAL, + [MRP_APPLICANT_VP] = MRP_TX_ACTION_S_JOIN_IN, + [MRP_APPLICANT_VN] = MRP_TX_ACTION_S_NEW, + [MRP_APPLICANT_AN] = MRP_TX_ACTION_S_NEW, + [MRP_APPLICANT_AA] = MRP_TX_ACTION_S_JOIN_IN, + [MRP_APPLICANT_QA] = MRP_TX_ACTION_S_JOIN_IN_OPTIONAL, + [MRP_APPLICANT_LA] = MRP_TX_ACTION_S_LV, + [MRP_APPLICANT_AO] = MRP_TX_ACTION_S_IN_OPTIONAL, + [MRP_APPLICANT_QO] = MRP_TX_ACTION_S_IN_OPTIONAL, + [MRP_APPLICANT_AP] = MRP_TX_ACTION_S_JOIN_IN, + [MRP_APPLICANT_QP] = MRP_TX_ACTION_S_IN_OPTIONAL, +}; + +static void mrp_attrvalue_inc(void *value, u8 len) +{ + u8 *v = (u8 *)value; + + /* Add 1 to the last byte. If it becomes zero, + * go to the previous byte and repeat. + */ + while (len > 0 && !++v[--len]) + ; +} + +static int mrp_attr_cmp(const struct mrp_attr *attr, + const void *value, u8 len, u8 type) +{ + if (attr->type != type) + return attr->type - type; + if (attr->len != len) + return attr->len - len; + return memcmp(attr->value, value, len); +} + +static struct mrp_attr *mrp_attr_lookup(const struct mrp_applicant *app, + const void *value, u8 len, u8 type) +{ + struct rb_node *parent = app->mad.rb_node; + struct mrp_attr *attr; + int d; + + while (parent) { + attr = rb_entry(parent, struct mrp_attr, node); + d = mrp_attr_cmp(attr, value, len, type); + if (d > 0) + parent = parent->rb_left; + else if (d < 0) + parent = parent->rb_right; + else + return attr; + } + return NULL; +} + +static struct mrp_attr *mrp_attr_create(struct mrp_applicant *app, + const void *value, u8 len, u8 type) +{ + struct rb_node *parent = NULL, **p = &app->mad.rb_node; + struct mrp_attr *attr; + int d; + + while (*p) { + parent = *p; + attr = rb_entry(parent, struct mrp_attr, node); + d = mrp_attr_cmp(attr, value, len, type); + if (d > 0) + p = &parent->rb_left; + else if (d < 0) + p = &parent->rb_right; + else { + /* The attribute already exists; re-use it. */ + return attr; + } + } + attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC); + if (!attr) + return attr; + attr->state = MRP_APPLICANT_VO; + attr->type = type; + attr->len = len; + memcpy(attr->value, value, len); + + rb_link_node(&attr->node, parent, p); + rb_insert_color(&attr->node, &app->mad); + return attr; +} + +static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr) +{ + rb_erase(&attr->node, &app->mad); + kfree(attr); +} + +static int mrp_pdu_init(struct mrp_applicant *app) +{ + struct sk_buff *skb; + struct mrp_pdu_hdr *ph; + + skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev), + GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + skb->dev = app->dev; + skb->protocol = app->app->pkttype.type; + skb_reserve(skb, LL_RESERVED_SPACE(app->dev)); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + + ph = (struct mrp_pdu_hdr *)__skb_put(skb, sizeof(*ph)); + ph->version = app->app->version; + + app->pdu = skb; + return 0; +} + +static int mrp_pdu_append_end_mark(struct mrp_applicant *app) +{ + __be16 *endmark; + + if (skb_tailroom(app->pdu) < sizeof(*endmark)) + return -1; + endmark = (__be16 *)__skb_put(app->pdu, sizeof(*endmark)); + put_unaligned(MRP_END_MARK, endmark); + return 0; +} + +static void mrp_pdu_queue(struct mrp_applicant *app) +{ + if (!app->pdu) + return; + + if (mrp_cb(app->pdu)->mh) + mrp_pdu_append_end_mark(app); + mrp_pdu_append_end_mark(app); + + dev_hard_header(app->pdu, app->dev, ntohs(app->app->pkttype.type), + app->app->group_address, app->dev->dev_addr, + app->pdu->len); + + skb_queue_tail(&app->queue, app->pdu); + app->pdu = NULL; +} + +static void mrp_queue_xmit(struct mrp_applicant *app) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&app->queue))) + dev_queue_xmit(skb); +} + +static int mrp_pdu_append_msg_hdr(struct mrp_applicant *app, + u8 attrtype, u8 attrlen) +{ + struct mrp_msg_hdr *mh; + + if (mrp_cb(app->pdu)->mh) { + if (mrp_pdu_append_end_mark(app) < 0) + return -1; + mrp_cb(app->pdu)->mh = NULL; + mrp_cb(app->pdu)->vah = NULL; + } + + if (skb_tailroom(app->pdu) < sizeof(*mh)) + return -1; + mh = (struct mrp_msg_hdr *)__skb_put(app->pdu, sizeof(*mh)); + mh->attrtype = attrtype; + mh->attrlen = attrlen; + mrp_cb(app->pdu)->mh = mh; + return 0; +} + +static int mrp_pdu_append_vecattr_hdr(struct mrp_applicant *app, + const void *firstattrvalue, u8 attrlen) +{ + struct mrp_vecattr_hdr *vah; + + if (skb_tailroom(app->pdu) < sizeof(*vah) + attrlen) + return -1; + vah = (struct mrp_vecattr_hdr *)__skb_put(app->pdu, + sizeof(*vah) + attrlen); + put_unaligned(0, &vah->lenflags); + memcpy(vah->firstattrvalue, firstattrvalue, attrlen); + mrp_cb(app->pdu)->vah = vah; + memcpy(mrp_cb(app->pdu)->attrvalue, firstattrvalue, attrlen); + return 0; +} + +static int mrp_pdu_append_vecattr_event(struct mrp_applicant *app, + const struct mrp_attr *attr, + enum mrp_vecattr_event vaevent) +{ + u16 len, pos; + u8 *vaevents; + int err; +again: + if (!app->pdu) { + err = mrp_pdu_init(app); + if (err < 0) + return err; + } + + /* If there is no Message header in the PDU, or the Message header is + * for a different attribute type, add an EndMark (if necessary) and a + * new Message header to the PDU. + */ + if (!mrp_cb(app->pdu)->mh || + mrp_cb(app->pdu)->mh->attrtype != attr->type || + mrp_cb(app->pdu)->mh->attrlen != attr->len) { + if (mrp_pdu_append_msg_hdr(app, attr->type, attr->len) < 0) + goto queue; + } + + /* If there is no VectorAttribute header for this Message in the PDU, + * or this attribute's value does not sequentially follow the previous + * attribute's value, add a new VectorAttribute header to the PDU. + */ + if (!mrp_cb(app->pdu)->vah || + memcmp(mrp_cb(app->pdu)->attrvalue, attr->value, attr->len)) { + if (mrp_pdu_append_vecattr_hdr(app, attr->value, attr->len) < 0) + goto queue; + } + + len = be16_to_cpu(get_unaligned(&mrp_cb(app->pdu)->vah->lenflags)); + pos = len % 3; + + /* Events are packed into Vectors in the PDU, three to a byte. Add a + * byte to the end of the Vector if necessary. + */ + if (!pos) { + if (skb_tailroom(app->pdu) < sizeof(u8)) + goto queue; + vaevents = (u8 *)__skb_put(app->pdu, sizeof(u8)); + } else { + vaevents = (u8 *)(skb_tail_pointer(app->pdu) - sizeof(u8)); + } + + switch (pos) { + case 0: + *vaevents = vaevent * (__MRP_VECATTR_EVENT_MAX * + __MRP_VECATTR_EVENT_MAX); + break; + case 1: + *vaevents += vaevent * __MRP_VECATTR_EVENT_MAX; + break; + case 2: + *vaevents += vaevent; + break; + default: + WARN_ON(1); + } + + /* Increment the length of the VectorAttribute in the PDU, as well as + * the value of the next attribute that would continue its Vector. + */ + put_unaligned(cpu_to_be16(++len), &mrp_cb(app->pdu)->vah->lenflags); + mrp_attrvalue_inc(mrp_cb(app->pdu)->attrvalue, attr->len); + + return 0; + +queue: + mrp_pdu_queue(app); + goto again; +} + +static void mrp_attr_event(struct mrp_applicant *app, + struct mrp_attr *attr, enum mrp_event event) +{ + enum mrp_applicant_state state; + + state = mrp_applicant_state_table[attr->state][event]; + if (state == MRP_APPLICANT_INVALID) { + WARN_ON(1); + return; + } + + if (event == MRP_EVENT_TX) { + /* When appending the attribute fails, don't update its state + * in order to retry at the next TX event. + */ + + switch (mrp_tx_action_table[attr->state]) { + case MRP_TX_ACTION_NONE: + case MRP_TX_ACTION_S_JOIN_IN_OPTIONAL: + case MRP_TX_ACTION_S_IN_OPTIONAL: + break; + case MRP_TX_ACTION_S_NEW: + if (mrp_pdu_append_vecattr_event( + app, attr, MRP_VECATTR_EVENT_NEW) < 0) + return; + break; + case MRP_TX_ACTION_S_JOIN_IN: + if (mrp_pdu_append_vecattr_event( + app, attr, MRP_VECATTR_EVENT_JOIN_IN) < 0) + return; + break; + case MRP_TX_ACTION_S_LV: + if (mrp_pdu_append_vecattr_event( + app, attr, MRP_VECATTR_EVENT_LV) < 0) + return; + /* As a pure applicant, sending a leave message + * implies that the attribute was unregistered and + * can be destroyed. + */ + mrp_attr_destroy(app, attr); + return; + default: + WARN_ON(1); + } + } + + attr->state = state; +} + +int mrp_request_join(const struct net_device *dev, + const struct mrp_application *appl, + const void *value, u8 len, u8 type) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + struct mrp_applicant *app = rtnl_dereference( + port->applicants[appl->type]); + struct mrp_attr *attr; + + if (sizeof(struct mrp_skb_cb) + len > + FIELD_SIZEOF(struct sk_buff, cb)) + return -ENOMEM; + + spin_lock_bh(&app->lock); + attr = mrp_attr_create(app, value, len, type); + if (!attr) { + spin_unlock_bh(&app->lock); + return -ENOMEM; + } + mrp_attr_event(app, attr, MRP_EVENT_JOIN); + spin_unlock_bh(&app->lock); + return 0; +} +EXPORT_SYMBOL_GPL(mrp_request_join); + +void mrp_request_leave(const struct net_device *dev, + const struct mrp_application *appl, + const void *value, u8 len, u8 type) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + struct mrp_applicant *app = rtnl_dereference( + port->applicants[appl->type]); + struct mrp_attr *attr; + + if (sizeof(struct mrp_skb_cb) + len > + FIELD_SIZEOF(struct sk_buff, cb)) + return; + + spin_lock_bh(&app->lock); + attr = mrp_attr_lookup(app, value, len, type); + if (!attr) { + spin_unlock_bh(&app->lock); + return; + } + mrp_attr_event(app, attr, MRP_EVENT_LV); + spin_unlock_bh(&app->lock); +} +EXPORT_SYMBOL_GPL(mrp_request_leave); + +static void mrp_mad_event(struct mrp_applicant *app, enum mrp_event event) +{ + struct rb_node *node, *next; + struct mrp_attr *attr; + + for (node = rb_first(&app->mad); + next = node ? rb_next(node) : NULL, node != NULL; + node = next) { + attr = rb_entry(node, struct mrp_attr, node); + mrp_attr_event(app, attr, event); + } +} + +static void mrp_join_timer_arm(struct mrp_applicant *app) +{ + unsigned long delay; + + delay = (u64)msecs_to_jiffies(mrp_join_time) * net_random() >> 32; + mod_timer(&app->join_timer, jiffies + delay); +} + +static void mrp_join_timer(unsigned long data) +{ + struct mrp_applicant *app = (struct mrp_applicant *)data; + + spin_lock(&app->lock); + mrp_mad_event(app, MRP_EVENT_TX); + mrp_pdu_queue(app); + spin_unlock(&app->lock); + + mrp_queue_xmit(app); + mrp_join_timer_arm(app); +} + +static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) +{ + __be16 endmark; + + if (skb_copy_bits(skb, *offset, &endmark, sizeof(endmark)) < 0) + return -1; + if (endmark == MRP_END_MARK) { + *offset += sizeof(endmark); + return -1; + } + return 0; +} + +static void mrp_pdu_parse_vecattr_event(struct mrp_applicant *app, + struct sk_buff *skb, + enum mrp_vecattr_event vaevent) +{ + struct mrp_attr *attr; + enum mrp_event event; + + attr = mrp_attr_lookup(app, mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen, + mrp_cb(skb)->mh->attrtype); + if (attr == NULL) + return; + + switch (vaevent) { + case MRP_VECATTR_EVENT_NEW: + event = MRP_EVENT_R_NEW; + break; + case MRP_VECATTR_EVENT_JOIN_IN: + event = MRP_EVENT_R_JOIN_IN; + break; + case MRP_VECATTR_EVENT_IN: + event = MRP_EVENT_R_IN; + break; + case MRP_VECATTR_EVENT_JOIN_MT: + event = MRP_EVENT_R_JOIN_MT; + break; + case MRP_VECATTR_EVENT_MT: + event = MRP_EVENT_R_MT; + break; + case MRP_VECATTR_EVENT_LV: + event = MRP_EVENT_R_LV; + break; + default: + return; + } + + mrp_attr_event(app, attr, event); +} + +static int mrp_pdu_parse_vecattr(struct mrp_applicant *app, + struct sk_buff *skb, int *offset) +{ + struct mrp_vecattr_hdr _vah; + u16 valen; + u8 vaevents, vaevent; + + mrp_cb(skb)->vah = skb_header_pointer(skb, *offset, sizeof(_vah), + &_vah); + if (!mrp_cb(skb)->vah) + return -1; + *offset += sizeof(_vah); + + if (get_unaligned(&mrp_cb(skb)->vah->lenflags) & + MRP_VECATTR_HDR_FLAG_LA) + mrp_mad_event(app, MRP_EVENT_R_LA); + valen = be16_to_cpu(get_unaligned(&mrp_cb(skb)->vah->lenflags) & + MRP_VECATTR_HDR_LEN_MASK); + + /* The VectorAttribute structure in a PDU carries event information + * about one or more attributes having consecutive values. Only the + * value for the first attribute is contained in the structure. So + * we make a copy of that value, and then increment it each time we + * advance to the next event in its Vector. + */ + if (sizeof(struct mrp_skb_cb) + mrp_cb(skb)->mh->attrlen > + FIELD_SIZEOF(struct sk_buff, cb)) + return -1; + if (skb_copy_bits(skb, *offset, mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen) < 0) + return -1; + *offset += mrp_cb(skb)->mh->attrlen; + + /* In a VectorAttribute, the Vector contains events which are packed + * three to a byte. We process one byte of the Vector at a time. + */ + while (valen > 0) { + if (skb_copy_bits(skb, *offset, &vaevents, + sizeof(vaevents)) < 0) + return -1; + *offset += sizeof(vaevents); + + /* Extract and process the first event. */ + vaevent = vaevents / (__MRP_VECATTR_EVENT_MAX * + __MRP_VECATTR_EVENT_MAX); + if (vaevent >= __MRP_VECATTR_EVENT_MAX) { + /* The byte is malformed; stop processing. */ + return -1; + } + mrp_pdu_parse_vecattr_event(app, skb, vaevent); + + /* If present, extract and process the second event. */ + if (!--valen) + break; + mrp_attrvalue_inc(mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen); + vaevents %= (__MRP_VECATTR_EVENT_MAX * + __MRP_VECATTR_EVENT_MAX); + vaevent = vaevents / __MRP_VECATTR_EVENT_MAX; + mrp_pdu_parse_vecattr_event(app, skb, vaevent); + + /* If present, extract and process the third event. */ + if (!--valen) + break; + mrp_attrvalue_inc(mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen); + vaevents %= __MRP_VECATTR_EVENT_MAX; + vaevent = vaevents; + mrp_pdu_parse_vecattr_event(app, skb, vaevent); + } + return 0; +} + +static int mrp_pdu_parse_msg(struct mrp_applicant *app, struct sk_buff *skb, + int *offset) +{ + struct mrp_msg_hdr _mh; + + mrp_cb(skb)->mh = skb_header_pointer(skb, *offset, sizeof(_mh), &_mh); + if (!mrp_cb(skb)->mh) + return -1; + *offset += sizeof(_mh); + + if (mrp_cb(skb)->mh->attrtype == 0 || + mrp_cb(skb)->mh->attrtype > app->app->maxattr || + mrp_cb(skb)->mh->attrlen == 0) + return -1; + + while (skb->len > *offset) { + if (mrp_pdu_parse_end_mark(skb, offset) < 0) + break; + if (mrp_pdu_parse_vecattr(app, skb, offset) < 0) + return -1; + } + return 0; +} + +static int mrp_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct mrp_application *appl = container_of(pt, struct mrp_application, + pkttype); + struct mrp_port *port; + struct mrp_applicant *app; + struct mrp_pdu_hdr _ph; + const struct mrp_pdu_hdr *ph; + int offset = skb_network_offset(skb); + + /* If the interface is in promiscuous mode, drop the packet if + * it was unicast to another host. + */ + if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) + goto out; + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + goto out; + port = rcu_dereference(dev->mrp_port); + if (unlikely(!port)) + goto out; + app = rcu_dereference(port->applicants[appl->type]); + if (unlikely(!app)) + goto out; + + ph = skb_header_pointer(skb, offset, sizeof(_ph), &_ph); + if (!ph) + goto out; + offset += sizeof(_ph); + + if (ph->version != app->app->version) + goto out; + + spin_lock(&app->lock); + while (skb->len > offset) { + if (mrp_pdu_parse_end_mark(skb, &offset) < 0) + break; + if (mrp_pdu_parse_msg(app, skb, &offset) < 0) + break; + } + spin_unlock(&app->lock); +out: + kfree_skb(skb); + return 0; +} + +static int mrp_init_port(struct net_device *dev) +{ + struct mrp_port *port; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + rcu_assign_pointer(dev->mrp_port, port); + return 0; +} + +static void mrp_release_port(struct net_device *dev) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + unsigned int i; + + for (i = 0; i <= MRP_APPLICATION_MAX; i++) { + if (rtnl_dereference(port->applicants[i])) + return; + } + RCU_INIT_POINTER(dev->mrp_port, NULL); + kfree_rcu(port, rcu); +} + +int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) +{ + struct mrp_applicant *app; + int err; + + ASSERT_RTNL(); + + if (!rtnl_dereference(dev->mrp_port)) { + err = mrp_init_port(dev); + if (err < 0) + goto err1; + } + + err = -ENOMEM; + app = kzalloc(sizeof(*app), GFP_KERNEL); + if (!app) + goto err2; + + err = dev_mc_add(dev, appl->group_address); + if (err < 0) + goto err3; + + app->dev = dev; + app->app = appl; + app->mad = RB_ROOT; + spin_lock_init(&app->lock); + skb_queue_head_init(&app->queue); + rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); + setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); + mrp_join_timer_arm(app); + return 0; + +err3: + kfree(app); +err2: + mrp_release_port(dev); +err1: + return err; +} +EXPORT_SYMBOL_GPL(mrp_init_applicant); + +void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + struct mrp_applicant *app = rtnl_dereference( + port->applicants[appl->type]); + + ASSERT_RTNL(); + + RCU_INIT_POINTER(port->applicants[appl->type], NULL); + + /* Delete timer and generate a final TX event to flush out + * all pending messages before the applicant is gone. + */ + del_timer_sync(&app->join_timer); + + spin_lock(&app->lock); + mrp_mad_event(app, MRP_EVENT_TX); + mrp_pdu_queue(app); + spin_unlock(&app->lock); + + mrp_queue_xmit(app); + + dev_mc_del(dev, appl->group_address); + kfree_rcu(app, rcu); + mrp_release_port(dev); +} +EXPORT_SYMBOL_GPL(mrp_uninit_applicant); + +int mrp_register_application(struct mrp_application *appl) +{ + appl->pkttype.func = mrp_rcv; + dev_add_pack(&appl->pkttype); + return 0; +} +EXPORT_SYMBOL_GPL(mrp_register_application); + +void mrp_unregister_application(struct mrp_application *appl) +{ + dev_remove_pack(&appl->pkttype); +} +EXPORT_SYMBOL_GPL(mrp_unregister_application); diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig index fa073a5..8f7517d 100644 --- a/net/8021q/Kconfig +++ b/net/8021q/Kconfig @@ -27,3 +27,14 @@ config VLAN_8021Q_GVRP automatic propagation of registered VLANs to switches. If unsure, say N. + +config VLAN_8021Q_MVRP + bool "MVRP (Multiple VLAN Registration Protocol) support" + depends on VLAN_8021Q + select MRP + help + Select this to enable MVRP end-system support. MVRP is used for + automatic propagation of registered VLANs to switches; it + supersedes GVRP and is not backwards-compatible. + + If unsure, say N. diff --git a/net/8021q/Makefile b/net/8021q/Makefile index 9f4f174..7bc8db0 100644 --- a/net/8021q/Makefile +++ b/net/8021q/Makefile @@ -6,5 +6,6 @@ obj-$(CONFIG_VLAN_8021Q) += 8021q.o 8021q-y := vlan.o vlan_dev.o vlan_netlink.o 8021q-$(CONFIG_VLAN_8021Q_GVRP) += vlan_gvrp.o +8021q-$(CONFIG_VLAN_8021Q_MVRP) += vlan_mvrp.o 8021q-$(CONFIG_PROC_FS) += vlanproc.o diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index a292e80..85addcd 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -86,15 +86,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp = &vlan_info->grp; - /* Take it out of our own structures, but be sure to interlock with - * HW accelerating devices or SW vlan input packet processing if - * VLAN is not 0 (leave it there for 802.1p). - */ - if (vlan_id) - vlan_vid_del(real_dev, vlan_id); - grp->nr_vlan_devs--; + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_leave(dev); if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_leave(dev); @@ -105,8 +100,19 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) */ unregister_netdevice_queue(dev, head); - if (grp->nr_vlan_devs == 0) + netdev_upper_dev_unlink(real_dev, dev); + + if (grp->nr_vlan_devs == 0) { + vlan_mvrp_uninit_applicant(real_dev); vlan_gvrp_uninit_applicant(real_dev); + } + + /* Take it out of our own structures, but be sure to interlock with + * HW accelerating devices or SW vlan input packet processing if + * VLAN is not 0 (leave it there for 802.1p). + */ + if (vlan_id) + vlan_vid_del(real_dev, vlan_id); /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); @@ -115,19 +121,12 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) { const char *name = real_dev->name; - const struct net_device_ops *ops = real_dev->netdev_ops; if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { pr_info("VLANs not supported on %s\n", name); return -EOPNOTSUPP; } - if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && - (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) { - pr_info("Device %s has buggy VLAN hw accel\n", name); - return -EOPNOTSUPP; - } - if (vlan_find_dev(real_dev, vlan_id) != NULL) return -EEXIST; @@ -156,15 +155,22 @@ int register_vlan_dev(struct net_device *dev) err = vlan_gvrp_init_applicant(real_dev); if (err < 0) goto out_vid_del; + err = vlan_mvrp_init_applicant(real_dev); + if (err < 0) + goto out_uninit_gvrp; } err = vlan_group_prealloc_vid(grp, vlan_id); if (err < 0) - goto out_uninit_applicant; + goto out_uninit_mvrp; + + err = netdev_upper_dev_link(real_dev, dev); + if (err) + goto out_uninit_mvrp; err = register_netdevice(dev); if (err < 0) - goto out_uninit_applicant; + goto out_upper_dev_unlink; /* Account for reference in struct vlan_dev_priv */ dev_hold(real_dev); @@ -180,7 +186,12 @@ int register_vlan_dev(struct net_device *dev) return 0; -out_uninit_applicant: +out_upper_dev_unlink: + netdev_upper_dev_unlink(real_dev, dev); +out_uninit_mvrp: + if (grp->nr_vlan_devs == 0) + vlan_mvrp_uninit_applicant(real_dev); +out_uninit_gvrp: if (grp->nr_vlan_devs == 0) vlan_gvrp_uninit_applicant(real_dev); out_vid_del: @@ -654,13 +665,19 @@ static int __init vlan_proto_init(void) if (err < 0) goto err3; - err = vlan_netlink_init(); + err = vlan_mvrp_init(); if (err < 0) goto err4; + err = vlan_netlink_init(); + if (err < 0) + goto err5; + vlan_ioctl_set(vlan_ioctl_handler); return 0; +err5: + vlan_mvrp_uninit(); err4: vlan_gvrp_uninit(); err3: @@ -681,6 +698,7 @@ static void __exit vlan_cleanup_module(void) unregister_pernet_subsys(&vlan_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ + vlan_mvrp_uninit(); vlan_gvrp_uninit(); } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index a4886d9..670f1e8 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -171,6 +171,22 @@ static inline int vlan_gvrp_init(void) { return 0; } static inline void vlan_gvrp_uninit(void) {} #endif +#ifdef CONFIG_VLAN_8021Q_MVRP +extern int vlan_mvrp_request_join(const struct net_device *dev); +extern void vlan_mvrp_request_leave(const struct net_device *dev); +extern int vlan_mvrp_init_applicant(struct net_device *dev); +extern void vlan_mvrp_uninit_applicant(struct net_device *dev); +extern int vlan_mvrp_init(void); +extern void vlan_mvrp_uninit(void); +#else +static inline int vlan_mvrp_request_join(const struct net_device *dev) { return 0; } +static inline void vlan_mvrp_request_leave(const struct net_device *dev) {} +static inline int vlan_mvrp_init_applicant(struct net_device *dev) { return 0; } +static inline void vlan_mvrp_uninit_applicant(struct net_device *dev) {} +static inline int vlan_mvrp_init(void) { return 0; } +static inline void vlan_mvrp_uninit(void) {} +#endif + extern const char vlan_fullname[]; extern const char vlan_version[]; extern int vlan_netlink_init(void); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 65e06ab..f3b6f51 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -60,21 +60,25 @@ bool vlan_do_receive(struct sk_buff **skbp) return true; } -/* Must be invoked with rcu_read_lock or with RTNL. */ -struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, +/* Must be invoked with rcu_read_lock. */ +struct net_device *__vlan_find_dev_deep(struct net_device *dev, u16 vlan_id) { - struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info); + struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info); if (vlan_info) { return vlan_group_get_device(&vlan_info->grp, vlan_id); } else { /* - * Bonding slaves do not have grp assigned to themselves. - * Grp is assigned to bonding master instead. + * Lower devices of master uppers (bonding, team) do not have + * grp assigned to themselves. Grp is assigned to upper device + * instead. */ - if (netif_is_bond_slave(real_dev)) - return __vlan_find_dev_deep(real_dev->master, vlan_id); + struct net_device *upper_dev; + + upper_dev = netdev_master_upper_dev_get_rcu(dev); + if (upper_dev) + return __vlan_find_dev_deep(upper_dev, vlan_id); } return NULL; @@ -140,6 +144,7 @@ err_free: kfree_skb(skb); return NULL; } +EXPORT_SYMBOL(vlan_untag); /* @@ -220,8 +225,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid, if (!vid_info) return -ENOMEM; - if ((dev->features & NETIF_F_HW_VLAN_FILTER) && - ops->ndo_vlan_rx_add_vid) { + if (dev->features & NETIF_F_HW_VLAN_FILTER) { err = ops->ndo_vlan_rx_add_vid(dev, vid); if (err) { kfree(vid_info); @@ -278,8 +282,7 @@ static void __vlan_vid_del(struct vlan_info *vlan_info, unsigned short vid = vid_info->vid; int err; - if ((dev->features & NETIF_F_HW_VLAN_FILTER) && - ops->ndo_vlan_rx_kill_vid) { + if (dev->features & NETIF_F_HW_VLAN_FILTER) { err = ops->ndo_vlan_rx_kill_vid(dev, vid); if (err) { pr_warn("failed to kill vid %d for device %s\n", diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4a6d31a..19cf81b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -261,7 +261,7 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) u32 old_flags = vlan->flags; if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | - VLAN_FLAG_LOOSE_BINDING)) + VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) return -EINVAL; vlan->flags = (old_flags & ~mask) | (flags & mask); @@ -272,6 +272,13 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) else vlan_gvrp_request_leave(dev); } + + if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_MVRP) { + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_join(dev); + else + vlan_mvrp_request_leave(dev); + } return 0; } @@ -312,6 +319,9 @@ static int vlan_dev_open(struct net_device *dev) if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_join(dev); + if (netif_carrier_ok(real_dev)) netif_carrier_on(dev); return 0; @@ -640,9 +650,9 @@ static int vlan_ethtool_get_settings(struct net_device *dev, static void vlan_ethtool_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strcpy(info->driver, vlan_fullname); - strcpy(info->version, vlan_version); - strcpy(info->fw_version, "N/A"); + strlcpy(info->driver, vlan_fullname, sizeof(info->driver)); + strlcpy(info->version, vlan_version, sizeof(info->version)); + strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); } static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) @@ -723,7 +733,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev) vlan->netpoll = NULL; - __netpoll_free_rcu(netpoll); + __netpoll_free_async(netpoll); } #endif /* CONFIG_NET_POLL_CONTROLLER */ diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c new file mode 100644 index 0000000..d9ec1d5 --- /dev/null +++ b/net/8021q/vlan_mvrp.c @@ -0,0 +1,72 @@ +/* + * IEEE 802.1Q Multiple VLAN Registration Protocol (MVRP) + * + * Copyright (c) 2012 Massachusetts Institute of Technology + * + * Adapted from code in net/8021q/vlan_gvrp.c + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/types.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <net/mrp.h> +#include "vlan.h" + +#define MRP_MVRP_ADDRESS { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 } + +enum mvrp_attributes { + MVRP_ATTR_INVALID, + MVRP_ATTR_VID, + __MVRP_ATTR_MAX +}; +#define MVRP_ATTR_MAX (__MVRP_ATTR_MAX - 1) + +static struct mrp_application vlan_mrp_app __read_mostly = { + .type = MRP_APPLICATION_MVRP, + .maxattr = MVRP_ATTR_MAX, + .pkttype.type = htons(ETH_P_MVRP), + .group_address = MRP_MVRP_ADDRESS, + .version = 0, +}; + +int vlan_mvrp_request_join(const struct net_device *dev) +{ + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + return mrp_request_join(vlan->real_dev, &vlan_mrp_app, + &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); +} + +void vlan_mvrp_request_leave(const struct net_device *dev) +{ + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + mrp_request_leave(vlan->real_dev, &vlan_mrp_app, + &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); +} + +int vlan_mvrp_init_applicant(struct net_device *dev) +{ + return mrp_init_applicant(dev, &vlan_mrp_app); +} + +void vlan_mvrp_uninit_applicant(struct net_device *dev) +{ + mrp_uninit_applicant(dev, &vlan_mrp_app); +} + +int __init vlan_mvrp_init(void) +{ + return mrp_register_application(&vlan_mrp_app); +} + +void vlan_mvrp_uninit(void) +{ + mrp_unregister_application(&vlan_mrp_app); +} diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 708c80e..1789658 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -62,7 +62,7 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) flags = nla_data(data[IFLA_VLAN_FLAGS]); if ((flags->flags & flags->mask) & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | - VLAN_FLAG_LOOSE_BINDING)) + VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) return -EINVAL; } diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 4de77ea..dc526ec 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -131,7 +131,7 @@ void vlan_proc_cleanup(struct net *net) remove_proc_entry(name_conf, vn->proc_vlan_dir); if (vn->proc_vlan_dir) - proc_net_remove(net, name_root); + remove_proc_entry(name_root, net->proc_net); /* Dynamically added entries should be cleaned up as their vlan_device * is removed, so we should not have to take care of it here... diff --git a/net/9p/Kconfig b/net/9p/Kconfig index d9ea09b..a75174a 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -23,7 +23,7 @@ config NET_9P_VIRTIO guest partitions and a host partition. config NET_9P_RDMA - depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL + depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS tristate "9P RDMA Transport (Experimental)" help This builds support for an RDMA transport. diff --git a/net/9p/client.c b/net/9p/client.c index 34d4176..8eb7542 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1100,7 +1100,7 @@ void p9_client_begin_disconnect(struct p9_client *clnt) EXPORT_SYMBOL(p9_client_begin_disconnect); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, - char *uname, u32 n_uname, char *aname) + char *uname, kuid_t n_uname, char *aname) { int err = 0; struct p9_req_t *req; @@ -1117,7 +1117,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, goto error; } - req = p9_client_rpc(clnt, P9_TATTACH, "ddss?d", fid->fid, + req = p9_client_rpc(clnt, P9_TATTACH, "ddss?u", fid->fid, afid ? afid->fid : P9_NOFID, uname, aname, n_uname); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1270,7 +1270,7 @@ error: EXPORT_SYMBOL(p9_client_open); int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, - gid_t gid, struct p9_qid *qid) + kgid_t gid, struct p9_qid *qid) { int err = 0; struct p9_client *clnt; @@ -1279,13 +1279,14 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, p9_debug(P9_DEBUG_9P, ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n", - ofid->fid, name, flags, mode, gid); + ofid->fid, name, flags, mode, + from_kgid(&init_user_ns, gid)); clnt = ofid->clnt; if (ofid->mode != -1) return -EINVAL; - req = p9_client_rpc(clnt, P9_TLCREATE, "dsddd", ofid->fid, name, flags, + req = p9_client_rpc(clnt, P9_TLCREATE, "dsddg", ofid->fid, name, flags, mode, gid); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1358,7 +1359,7 @@ error: } EXPORT_SYMBOL(p9_client_fcreate); -int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, +int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, kgid_t gid, struct p9_qid *qid) { int err = 0; @@ -1369,7 +1370,7 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, dfid->fid, name, symtgt); clnt = dfid->clnt; - req = p9_client_rpc(clnt, P9_TSYMLINK, "dssd", dfid->fid, name, symtgt, + req = p9_client_rpc(clnt, P9_TSYMLINK, "dssg", dfid->fid, name, symtgt, gid); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1710,7 +1711,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) (unsigned long long)ret->qid.path, ret->qid.version, ret->mode, ret->atime, ret->mtime, (unsigned long long)ret->length, ret->name, ret->uid, ret->gid, ret->muid, ret->extension, - ret->n_uid, ret->n_gid, ret->n_muid); + from_kuid(&init_user_ns, ret->n_uid), + from_kgid(&init_user_ns, ret->n_gid), + from_kuid(&init_user_ns, ret->n_muid)); p9_free_req(clnt, req); return ret; @@ -1764,8 +1767,10 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, "<<< st_btime_sec=%lld st_btime_nsec=%lld\n" "<<< st_gen=%lld st_data_version=%lld", ret->st_result_mask, ret->qid.type, ret->qid.path, - ret->qid.version, ret->st_mode, ret->st_nlink, ret->st_uid, - ret->st_gid, ret->st_rdev, ret->st_size, ret->st_blksize, + ret->qid.version, ret->st_mode, ret->st_nlink, + from_kuid(&init_user_ns, ret->st_uid), + from_kgid(&init_user_ns, ret->st_gid), + ret->st_rdev, ret->st_size, ret->st_blksize, ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec, ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec, ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec, @@ -1828,7 +1833,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) (unsigned long long)wst->qid.path, wst->qid.version, wst->mode, wst->atime, wst->mtime, (unsigned long long)wst->length, wst->name, wst->uid, wst->gid, wst->muid, wst->extension, - wst->n_uid, wst->n_gid, wst->n_muid); + from_kuid(&init_user_ns, wst->n_uid), + from_kgid(&init_user_ns, wst->n_gid), + from_kuid(&init_user_ns, wst->n_muid)); req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst); if (IS_ERR(req)) { @@ -1857,7 +1864,9 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) " valid=%x mode=%x uid=%d gid=%d size=%lld\n" " atime_sec=%lld atime_nsec=%lld\n" " mtime_sec=%lld mtime_nsec=%lld\n", - p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid, + p9attr->valid, p9attr->mode, + from_kuid(&init_user_ns, p9attr->uid), + from_kgid(&init_user_ns, p9attr->gid), p9attr->size, p9attr->atime_sec, p9attr->atime_nsec, p9attr->mtime_sec, p9attr->mtime_nsec); @@ -2106,7 +2115,7 @@ error: EXPORT_SYMBOL(p9_client_readdir); int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, - dev_t rdev, gid_t gid, struct p9_qid *qid) + dev_t rdev, kgid_t gid, struct p9_qid *qid) { int err; struct p9_client *clnt; @@ -2116,7 +2125,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d " "minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev)); - req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode, + req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddg", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev), gid); if (IS_ERR(req)) return PTR_ERR(req); @@ -2137,7 +2146,7 @@ error: EXPORT_SYMBOL(p9_client_mknod_dotl); int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, - gid_t gid, struct p9_qid *qid) + kgid_t gid, struct p9_qid *qid) { int err; struct p9_client *clnt; @@ -2146,8 +2155,8 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, err = 0; clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", - fid->fid, name, mode, gid); - req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode, + fid->fid, name, mode, from_kgid(&init_user_ns, gid)); + req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg", fid->fid, name, mode, gid); if (IS_ERR(req)) return PTR_ERR(req); diff --git a/net/9p/error.c b/net/9p/error.c index 2ab2de7..126fd0d 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -221,15 +221,13 @@ EXPORT_SYMBOL(p9_error_init); int p9_errstr2errno(char *errstr, int len) { int errno; - struct hlist_node *p; struct errormap *c; int bucket; errno = 0; - p = NULL; c = NULL; bucket = jhash(errstr, len, 0) % ERRHASHSZ; - hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { + hlist_for_each_entry(c, &hash_errmap[bucket], list) { if (c->namelen == len && !memcmp(c->name, errstr, len)) { errno = c->val; break; diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 3d33ecf..ab9127e 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -85,6 +85,8 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) d - int32_t q - int64_t s - string + u - numeric uid + g - numeric gid S - stat Q - qid D - data blob (int32_t size followed by void *, results are not freed) @@ -163,6 +165,26 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, (*sptr)[len] = 0; } break; + case 'u': { + kuid_t *uid = va_arg(ap, kuid_t *); + __le32 le_val; + if (pdu_read(pdu, &le_val, sizeof(le_val))) { + errcode = -EFAULT; + break; + } + *uid = make_kuid(&init_user_ns, + le32_to_cpu(le_val)); + } break; + case 'g': { + kgid_t *gid = va_arg(ap, kgid_t *); + __le32 le_val; + if (pdu_read(pdu, &le_val, sizeof(le_val))) { + errcode = -EFAULT; + break; + } + *gid = make_kgid(&init_user_ns, + le32_to_cpu(le_val)); + } break; case 'Q':{ struct p9_qid *qid = va_arg(ap, struct p9_qid *); @@ -177,11 +199,12 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, va_arg(ap, struct p9_wstat *); memset(stbuf, 0, sizeof(struct p9_wstat)); - stbuf->n_uid = stbuf->n_gid = stbuf->n_muid = - -1; + stbuf->n_uid = stbuf->n_muid = INVALID_UID; + stbuf->n_gid = INVALID_GID; + errcode = p9pdu_readf(pdu, proto_version, - "wwdQdddqssss?sddd", + "wwdQdddqssss?sugu", &stbuf->size, &stbuf->type, &stbuf->dev, &stbuf->qid, &stbuf->mode, &stbuf->atime, @@ -294,7 +317,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, memset(stbuf, 0, sizeof(struct p9_stat_dotl)); errcode = p9pdu_readf(pdu, proto_version, - "qQdddqqqqqqqqqqqqqqq", + "qQdugqqqqqqqqqqqqqqq", &stbuf->st_result_mask, &stbuf->qid, &stbuf->st_mode, @@ -377,6 +400,20 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, errcode = -EFAULT; } break; + case 'u': { + kuid_t uid = va_arg(ap, kuid_t); + __le32 val = cpu_to_le32( + from_kuid(&init_user_ns, uid)); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } break; + case 'g': { + kgid_t gid = va_arg(ap, kgid_t); + __le32 val = cpu_to_le32( + from_kgid(&init_user_ns, gid)); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } break; case 'Q':{ const struct p9_qid *qid = va_arg(ap, const struct p9_qid *); @@ -390,7 +427,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, va_arg(ap, const struct p9_wstat *); errcode = p9pdu_writef(pdu, proto_version, - "wwdQdddqssss?sddd", + "wwdQdddqssss?sugu", stbuf->size, stbuf->type, stbuf->dev, &stbuf->qid, stbuf->mode, stbuf->atime, @@ -468,7 +505,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, struct p9_iattr_dotl *); errcode = p9pdu_writef(pdu, proto_version, - "ddddqqqqq", + "ddugqqqqq", p9attr->valid, p9attr->mode, p9attr->uid, diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index fd05c81..de2e950 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -87,7 +87,7 @@ struct virtio_chan { /* This is global limit. Since we don't have a global structure, * will be placing it in each channel. */ - int p9_max_pages; + unsigned long p9_max_pages; /* Scatterlist: can be too big for stack. */ struct scatterlist sg[VIRTQUEUE_NUM]; diff --git a/net/9p/util.c b/net/9p/util.c index 6ceeeb3..59f278e 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -87,23 +87,18 @@ EXPORT_SYMBOL(p9_idpool_destroy); int p9_idpool_get(struct p9_idpool *p) { - int i = 0; - int error; + int i; unsigned long flags; -retry: - if (idr_pre_get(&p->pool, GFP_NOFS) == 0) - return -1; - + idr_preload(GFP_NOFS); spin_lock_irqsave(&p->lock, flags); /* no need to store exactly p, we just need something non-null */ - error = idr_get_new(&p->pool, p, &i); - spin_unlock_irqrestore(&p->lock, flags); + i = idr_alloc(&p->pool, p, 0, 0, GFP_NOWAIT); - if (error == -EAGAIN) - goto retry; - else if (error) + spin_unlock_irqrestore(&p->lock, flags); + idr_preload_end(); + if (i < 0) return -1; p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p); diff --git a/net/Kconfig b/net/Kconfig index 30b48f5..6f676ab 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -90,7 +90,6 @@ config NETWORK_SECMARK config NETWORK_PHY_TIMESTAMPING bool "Timestamping in PHY devices" - depends on EXPERIMENTAL help This allows timestamping of network packets by PHYs with hardware timestamping capabilities. This option adds some @@ -209,7 +208,6 @@ source "net/ipx/Kconfig" source "drivers/net/appletalk/Kconfig" source "net/x25/Kconfig" source "net/lapb/Kconfig" -source "net/wanrouter/Kconfig" source "net/phonet/Kconfig" source "net/ieee802154/Kconfig" source "net/mac802154/Kconfig" @@ -218,6 +216,7 @@ source "net/dcb/Kconfig" source "net/dns_resolver/Kconfig" source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" +source "net/vmw_vsock/Kconfig" config RPS boolean @@ -232,7 +231,7 @@ config RFS_ACCEL config XPS boolean - depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS + depends on SMP && USE_GENERIC_SMP_HELPERS default y config NETPRIO_CGROUP @@ -278,7 +277,7 @@ config NET_PKTGEN config NET_TCPPROBE tristate "TCP connection probing" - depends on INET && EXPERIMENTAL && PROC_FS && KPROBES + depends on INET && PROC_FS && KPROBES ---help--- This module allows for capturing the changes to TCP connection state in response to incoming packets. It is used for debugging @@ -295,7 +294,7 @@ config NET_TCPPROBE config NET_DROP_MONITOR tristate "Network packet drop alerting service" - depends on INET && EXPERIMENTAL && TRACEPOINTS + depends on INET && TRACEPOINTS ---help--- This feature provides an alerting service to userspace in the event that packets are discarded in the network stack. Alerts diff --git a/net/Makefile b/net/Makefile index 4f4ee08..091e7b04 100644 --- a/net/Makefile +++ b/net/Makefile @@ -26,7 +26,6 @@ obj-$(CONFIG_BRIDGE) += bridge/ obj-$(CONFIG_NET_DSA) += dsa/ obj-$(CONFIG_IPX) += ipx/ obj-$(CONFIG_ATALK) += appletalk/ -obj-$(CONFIG_WAN_ROUTER) += wanrouter/ obj-$(CONFIG_X25) += x25/ obj-$(CONFIG_LAPB) += lapb/ obj-$(CONFIG_NETROM) += netrom/ @@ -70,3 +69,4 @@ obj-$(CONFIG_CEPH_LIB) += ceph/ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ +obj-$(CONFIG_VSOCKETS) += vmw_vsock/ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 3347529..4a141e3 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -93,10 +93,9 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to, struct atalk_iface *atif) { struct sock *s; - struct hlist_node *node; read_lock_bh(&atalk_sockets_lock); - sk_for_each(s, node, &atalk_sockets) { + sk_for_each(s, &atalk_sockets) { struct atalk_sock *at = at_sk(s); if (to->sat_port != at->src_port) @@ -141,11 +140,10 @@ static struct sock *atalk_find_or_insert_socket(struct sock *sk, struct sockaddr_at *sat) { struct sock *s; - struct hlist_node *node; struct atalk_sock *at; write_lock_bh(&atalk_sockets_lock); - sk_for_each(s, node, &atalk_sockets) { + sk_for_each(s, &atalk_sockets) { at = at_sk(s); if (at->src_net == sat->sat_addr.s_net && @@ -1084,9 +1082,8 @@ static int atalk_pick_and_bind_port(struct sock *sk, struct sockaddr_at *sat) sat->sat_port < ATPORT_LAST; sat->sat_port++) { struct sock *s; - struct hlist_node *node; - sk_for_each(s, node, &atalk_sockets) { + sk_for_each(s, &atalk_sockets) { struct atalk_sock *at = at_sk(s); if (at->src_net == sat->sat_addr.s_net && diff --git a/net/atm/common.c b/net/atm/common.c index 806fc0a..737bef5 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -270,11 +270,11 @@ void atm_dev_release_vccs(struct atm_dev *dev) write_lock_irq(&vcc_sklist_lock); for (i = 0; i < VCC_HTABLE_SIZE; i++) { struct hlist_head *head = &vcc_hash[i]; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; struct sock *s; struct atm_vcc *vcc; - sk_for_each_safe(s, node, tmp, head) { + sk_for_each_safe(s, tmp, head) { vcc = atm_sk(s); if (vcc->dev == dev) { vcc_release_async(vcc, -EPIPE); @@ -317,11 +317,10 @@ static int adjust_tp(struct atm_trafprm *tp, unsigned char aal) static int check_ci(const struct atm_vcc *vcc, short vpi, int vci) { struct hlist_head *head = &vcc_hash[vci & (VCC_HTABLE_SIZE - 1)]; - struct hlist_node *node; struct sock *s; struct atm_vcc *walk; - sk_for_each(s, node, head) { + sk_for_each(s, head) { walk = atm_sk(s); if (walk->dev != vcc->dev) continue; @@ -532,6 +531,8 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sk_buff *skb; int copied, error = -EINVAL; + msg->msg_namelen = 0; + if (sock->state != SS_CONNECTED) return -ENOTCONN; diff --git a/net/atm/lec.c b/net/atm/lec.c index 2e3d942..f23916b 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -842,7 +842,9 @@ static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl, --*l; } - hlist_for_each_entry_from(tmp, e, next) { + tmp = container_of(e, struct lec_arp_table, next); + + hlist_for_each_entry_from(tmp, next) { if (--*l < 0) break; } @@ -1307,7 +1309,6 @@ lec_arp_add(struct lec_priv *priv, struct lec_arp_table *entry) static int lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) { - struct hlist_node *node; struct lec_arp_table *entry; int i, remove_vcc = 1; @@ -1326,7 +1327,7 @@ lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT */ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(entry, node, + hlist_for_each_entry(entry, &priv->lec_arp_tables[i], next) { if (memcmp(to_remove->atm_addr, entry->atm_addr, ATM_ESA_LEN) == 0) { @@ -1364,14 +1365,13 @@ static const char *get_status_string(unsigned char st) static void dump_arp_table(struct lec_priv *priv) { - struct hlist_node *node; struct lec_arp_table *rulla; char buf[256]; int i, j, offset; pr_info("Dump %p:\n", priv); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(rulla, node, + hlist_for_each_entry(rulla, &priv->lec_arp_tables[i], next) { offset = 0; offset += sprintf(buf, "%d: %p\n", i, rulla); @@ -1403,7 +1403,7 @@ static void dump_arp_table(struct lec_priv *priv) if (!hlist_empty(&priv->lec_no_forward)) pr_info("No forward\n"); - hlist_for_each_entry(rulla, node, &priv->lec_no_forward, next) { + hlist_for_each_entry(rulla, &priv->lec_no_forward, next) { offset = 0; offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr); offset += sprintf(buf + offset, " Atm:"); @@ -1428,7 +1428,7 @@ static void dump_arp_table(struct lec_priv *priv) if (!hlist_empty(&priv->lec_arp_empty_ones)) pr_info("Empty ones\n"); - hlist_for_each_entry(rulla, node, &priv->lec_arp_empty_ones, next) { + hlist_for_each_entry(rulla, &priv->lec_arp_empty_ones, next) { offset = 0; offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr); offset += sprintf(buf + offset, " Atm:"); @@ -1453,7 +1453,7 @@ static void dump_arp_table(struct lec_priv *priv) if (!hlist_empty(&priv->mcast_fwds)) pr_info("Multicast Forward VCCs\n"); - hlist_for_each_entry(rulla, node, &priv->mcast_fwds, next) { + hlist_for_each_entry(rulla, &priv->mcast_fwds, next) { offset = 0; offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr); offset += sprintf(buf + offset, " Atm:"); @@ -1487,7 +1487,7 @@ static void dump_arp_table(struct lec_priv *priv) static void lec_arp_destroy(struct lec_priv *priv) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry; int i; @@ -1499,7 +1499,7 @@ static void lec_arp_destroy(struct lec_priv *priv) spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_tables[i], next) { lec_arp_remove(priv, entry); lec_arp_put(entry); @@ -1507,7 +1507,7 @@ static void lec_arp_destroy(struct lec_priv *priv) INIT_HLIST_HEAD(&priv->lec_arp_tables[i]); } - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_empty_ones, next) { del_timer_sync(&entry->timer); lec_arp_clear_vccs(entry); @@ -1516,7 +1516,7 @@ static void lec_arp_destroy(struct lec_priv *priv) } INIT_HLIST_HEAD(&priv->lec_arp_empty_ones); - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_no_forward, next) { del_timer_sync(&entry->timer); lec_arp_clear_vccs(entry); @@ -1525,7 +1525,7 @@ static void lec_arp_destroy(struct lec_priv *priv) } INIT_HLIST_HEAD(&priv->lec_no_forward); - hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) { + hlist_for_each_entry_safe(entry, next, &priv->mcast_fwds, next) { /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ lec_arp_clear_vccs(entry); hlist_del(&entry->next); @@ -1542,14 +1542,13 @@ static void lec_arp_destroy(struct lec_priv *priv) static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, const unsigned char *mac_addr) { - struct hlist_node *node; struct hlist_head *head; struct lec_arp_table *entry; pr_debug("%pM\n", mac_addr); head = &priv->lec_arp_tables[HASH(mac_addr[ETH_ALEN - 1])]; - hlist_for_each_entry(entry, node, head, next) { + hlist_for_each_entry(entry, head, next) { if (ether_addr_equal(mac_addr, entry->mac_addr)) return entry; } @@ -1686,7 +1685,7 @@ static void lec_arp_check_expire(struct work_struct *work) unsigned long flags; struct lec_priv *priv = container_of(work, struct lec_priv, lec_arp_work.work); - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry; unsigned long now; int i; @@ -1696,7 +1695,7 @@ static void lec_arp_check_expire(struct work_struct *work) restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_tables[i], next) { if (__lec_arp_check_expire(entry, now, priv)) { struct sk_buff *skb; @@ -1823,14 +1822,14 @@ lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr, unsigned long permanent) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry; int i; pr_debug("\n"); spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_tables[i], next) { if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN) && (permanent || @@ -1855,7 +1854,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, unsigned int targetless_le_arp) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry, *tmp; int i; @@ -1870,7 +1869,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, * we have no entry in the cache. 7.1.30 */ if (!hlist_empty(&priv->lec_arp_empty_ones)) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_empty_ones, next) { if (memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN) == 0) { hlist_del(&entry->next); @@ -1915,7 +1914,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN); del_timer(&entry->timer); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(tmp, node, + hlist_for_each_entry(tmp, &priv->lec_arp_tables[i], next) { if (entry != tmp && !memcmp(tmp->atm_addr, atm_addr, ATM_ESA_LEN)) { @@ -1956,7 +1955,6 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb)) { unsigned long flags; - struct hlist_node *node; struct lec_arp_table *entry; int i, found_entry = 0; @@ -2026,7 +2024,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, ioc_data->atm_addr[16], ioc_data->atm_addr[17], ioc_data->atm_addr[18], ioc_data->atm_addr[19]); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(entry, node, + hlist_for_each_entry(entry, &priv->lec_arp_tables[i], next) { if (memcmp (ioc_data->atm_addr, entry->atm_addr, @@ -2103,7 +2101,6 @@ out: static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) { unsigned long flags; - struct hlist_node *node; struct lec_arp_table *entry; int i; @@ -2111,7 +2108,7 @@ static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(entry, node, + hlist_for_each_entry(entry, &priv->lec_arp_tables[i], next) { if (entry->flush_tran_id == tran_id && entry->status == ESI_FLUSH_PENDING) { @@ -2140,13 +2137,12 @@ lec_set_flush_tran_id(struct lec_priv *priv, const unsigned char *atm_addr, unsigned long tran_id) { unsigned long flags; - struct hlist_node *node; struct lec_arp_table *entry; int i; spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) - hlist_for_each_entry(entry, node, + hlist_for_each_entry(entry, &priv->lec_arp_tables[i], next) { if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) { entry->flush_tran_id = tran_id; @@ -2198,7 +2194,7 @@ out: static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry; int i; @@ -2208,7 +2204,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_tables[i], next) { if (vcc == entry->vcc) { lec_arp_remove(priv, entry); @@ -2219,7 +2215,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) } } - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_empty_ones, next) { if (entry->vcc == vcc) { lec_arp_clear_vccs(entry); @@ -2229,7 +2225,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) } } - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_no_forward, next) { if (entry->recv_vcc == vcc) { lec_arp_clear_vccs(entry); @@ -2239,7 +2235,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) } } - hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) { + hlist_for_each_entry_safe(entry, next, &priv->mcast_fwds, next) { if (entry->recv_vcc == vcc) { lec_arp_clear_vccs(entry); /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ @@ -2257,13 +2253,13 @@ lec_arp_check_empties(struct lec_priv *priv, struct atm_vcc *vcc, struct sk_buff *skb) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry, *tmp; struct lecdatahdr_8023 *hdr = (struct lecdatahdr_8023 *)skb->data; unsigned char *src = hdr->h_source; spin_lock_irqsave(&priv->lec_arp_lock, flags); - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_empty_ones, next) { if (vcc == entry->vcc) { del_timer(&entry->timer); diff --git a/net/atm/proc.c b/net/atm/proc.c index 0d020de..6ac35ff 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -385,7 +385,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf, page = get_zeroed_page(GFP_KERNEL); if (!page) return -ENOMEM; - dev = PDE(file->f_path.dentry->d_inode)->data; + dev = PDE(file_inode(file))->data; if (!dev->ops->proc_read) length = -EINVAL; else { @@ -460,7 +460,7 @@ static void atm_proc_dirs_remove(void) if (e->dirent) remove_proc_entry(e->name, atm_proc_root); } - proc_net_remove(&init_net, "atm"); + remove_proc_entry("atm", init_net.proc_net); } int __init atm_proc_init(void) diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 86767ca..4176887 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -217,7 +217,6 @@ static void purge_vcc(struct atm_vcc *vcc) static void sigd_close(struct atm_vcc *vcc) { - struct hlist_node *node; struct sock *s; int i; @@ -231,7 +230,7 @@ static void sigd_close(struct atm_vcc *vcc) for (i = 0; i < VCC_HTABLE_SIZE; ++i) { struct hlist_head *head = &vcc_hash[i]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); purge_vcc(vcc); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 779095d..e277e38 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -81,14 +81,13 @@ static void ax25_kill_by_device(struct net_device *dev) { ax25_dev *ax25_dev; ax25_cb *s; - struct hlist_node *node; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; spin_lock_bh(&ax25_list_lock); again: - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if (s->ax25_dev == ax25_dev) { s->ax25_dev = NULL; spin_unlock_bh(&ax25_list_lock); @@ -158,10 +157,9 @@ struct sock *ax25_find_listener(ax25_address *addr, int digi, struct net_device *dev, int type) { ax25_cb *s; - struct hlist_node *node; spin_lock(&ax25_list_lock); - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if ((s->iamdigi && !digi) || (!s->iamdigi && digi)) continue; if (s->sk && !ax25cmp(&s->source_addr, addr) && @@ -187,10 +185,9 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr, { struct sock *sk = NULL; ax25_cb *s; - struct hlist_node *node; spin_lock(&ax25_list_lock); - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if (s->sk && !ax25cmp(&s->source_addr, my_addr) && !ax25cmp(&s->dest_addr, dest_addr) && s->sk->sk_type == type) { @@ -213,10 +210,9 @@ ax25_cb *ax25_find_cb(ax25_address *src_addr, ax25_address *dest_addr, ax25_digi *digi, struct net_device *dev) { ax25_cb *s; - struct hlist_node *node; spin_lock_bh(&ax25_list_lock); - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if (s->sk && s->sk->sk_type != SOCK_SEQPACKET) continue; if (s->ax25_dev == NULL) @@ -248,10 +244,9 @@ void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto) { ax25_cb *s; struct sk_buff *copy; - struct hlist_node *node; spin_lock(&ax25_list_lock); - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if (s->sk != NULL && ax25cmp(&s->source_addr, addr) == 0 && s->sk->sk_type == SOCK_RAW && s->sk->sk_protocol == proto && @@ -1647,6 +1642,7 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, ax25_address src; const unsigned char *mac = skb_mac_header(skb); + memset(sax, 0, sizeof(struct full_sockaddr_ax25)); ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, &digi, NULL, NULL); sax->sax25_family = AF_AX25; @@ -1992,9 +1988,10 @@ static int __init ax25_init(void) dev_add_pack(&ax25_packet_type); register_netdevice_notifier(&ax25_dev_notifier); - proc_net_fops_create(&init_net, "ax25_route", S_IRUGO, &ax25_route_fops); - proc_net_fops_create(&init_net, "ax25", S_IRUGO, &ax25_info_fops); - proc_net_fops_create(&init_net, "ax25_calls", S_IRUGO, &ax25_uid_fops); + proc_create("ax25_route", S_IRUGO, init_net.proc_net, + &ax25_route_fops); + proc_create("ax25", S_IRUGO, init_net.proc_net, &ax25_info_fops); + proc_create("ax25_calls", S_IRUGO, init_net.proc_net, &ax25_uid_fops); out: return rc; } @@ -2008,9 +2005,9 @@ MODULE_ALIAS_NETPROTO(PF_AX25); static void __exit ax25_exit(void) { - proc_net_remove(&init_net, "ax25_route"); - proc_net_remove(&init_net, "ax25"); - proc_net_remove(&init_net, "ax25_calls"); + remove_proc_entry("ax25_route", init_net.proc_net); + remove_proc_entry("ax25", init_net.proc_net); + remove_proc_entry("ax25_calls", init_net.proc_net); unregister_netdevice_notifier(&ax25_dev_notifier); diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c index 5ea7fd3..e05bd57 100644 --- a/net/ax25/ax25_ds_subr.c +++ b/net/ax25/ax25_ds_subr.c @@ -39,7 +39,6 @@ void ax25_ds_nr_error_recovery(ax25_cb *ax25) void ax25_ds_enquiry_response(ax25_cb *ax25) { ax25_cb *ax25o; - struct hlist_node *node; /* Please note that neither DK4EG's nor DG2FEF's * DAMA spec mention the following behaviour as seen @@ -80,7 +79,7 @@ void ax25_ds_enquiry_response(ax25_cb *ax25) ax25_ds_set_timer(ax25->ax25_dev); spin_lock(&ax25_list_lock); - ax25_for_each(ax25o, node, &ax25_list) { + ax25_for_each(ax25o, &ax25_list) { if (ax25o == ax25) continue; @@ -159,10 +158,9 @@ static int ax25_check_dama_slave(ax25_dev *ax25_dev) { ax25_cb *ax25; int res = 0; - struct hlist_node *node; spin_lock(&ax25_list_lock); - ax25_for_each(ax25, node, &ax25_list) + ax25_for_each(ax25, &ax25_list) if (ax25->ax25_dev == ax25_dev && (ax25->condition & AX25_COND_DAMA_MODE) && ax25->state > AX25_STATE_1) { res = 1; break; diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 993c439..951cd57 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -70,7 +70,6 @@ static void ax25_ds_timeout(unsigned long arg) { ax25_dev *ax25_dev = (struct ax25_dev *) arg; ax25_cb *ax25; - struct hlist_node *node; if (ax25_dev == NULL || !ax25_dev->dama.slave) return; /* Yikes! */ @@ -81,7 +80,7 @@ static void ax25_ds_timeout(unsigned long arg) } spin_lock(&ax25_list_lock); - ax25_for_each(ax25, node, &ax25_list) { + ax25_for_each(ax25, &ax25_list) { if (ax25->ax25_dev != ax25_dev || !(ax25->condition & AX25_COND_DAMA_MODE)) continue; diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c index 7d5f24b..7f16e8a 100644 --- a/net/ax25/ax25_iface.c +++ b/net/ax25/ax25_iface.c @@ -193,10 +193,9 @@ int ax25_listen_mine(ax25_address *callsign, struct net_device *dev) void ax25_link_failed(ax25_cb *ax25, int reason) { struct ax25_linkfail *lf; - struct hlist_node *node; spin_lock_bh(&linkfail_lock); - hlist_for_each_entry(lf, node, &ax25_linkfail_list, lf_node) + hlist_for_each_entry(lf, &ax25_linkfail_list, lf_node) lf->func(ax25, reason); spin_unlock_bh(&linkfail_lock); } diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index 957999e..71c4bad 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -54,10 +54,9 @@ EXPORT_SYMBOL(ax25_uid_policy); ax25_uid_assoc *ax25_findbyuid(kuid_t uid) { ax25_uid_assoc *ax25_uid, *res = NULL; - struct hlist_node *node; read_lock(&ax25_uid_lock); - ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (uid_eq(ax25_uid->uid, uid)) { ax25_uid_hold(ax25_uid); res = ax25_uid; @@ -74,7 +73,6 @@ EXPORT_SYMBOL(ax25_findbyuid); int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) { ax25_uid_assoc *ax25_uid; - struct hlist_node *node; ax25_uid_assoc *user; unsigned long res; @@ -82,7 +80,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) case SIOCAX25GETUID: res = -ENOENT; read_lock(&ax25_uid_lock); - ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) { res = from_kuid_munged(current_user_ns(), ax25_uid->uid); break; @@ -126,7 +124,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) ax25_uid = NULL; write_lock(&ax25_uid_lock); - ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) break; } @@ -212,11 +210,10 @@ const struct file_operations ax25_uid_fops = { void __exit ax25_uid_free(void) { ax25_uid_assoc *ax25_uid; - struct hlist_node *node; write_lock(&ax25_uid_lock); again: - ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + ax25_uid_for_each(ax25_uid, &ax25_uid_list) { hlist_del_init(&ax25_uid->uid_node); ax25_uid_put(ax25_uid); goto again; diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index a0ba3bf..a4808c2 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 9f3925a..a5bb0a7 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -123,7 +123,7 @@ batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv) unsigned int msecs; msecs = atomic_read(&bat_priv->orig_interval) - BATADV_JITTER; - msecs += (random32() % 2 * BATADV_JITTER); + msecs += prandom_u32() % (2 * BATADV_JITTER); return jiffies + msecs_to_jiffies(msecs); } @@ -131,7 +131,7 @@ batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv) /* when do we schedule a ogm packet to be sent */ static unsigned long batadv_iv_ogm_fwd_send_time(void) { - return jiffies + msecs_to_jiffies(random32() % (BATADV_JITTER / 2)); + return jiffies + msecs_to_jiffies(prandom_u32() % (BATADV_JITTER / 2)); } /* apply hop penalty for a normal link */ @@ -183,7 +183,6 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, /* adjust all flags and log packets */ while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len, batadv_ogm_packet->tt_num_changes)) { - /* we might have aggregated direct link packets with an * ordinary base packet */ @@ -261,7 +260,6 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) */ if ((directlink && (batadv_ogm_packet->header.ttl == 1)) || (forw_packet->own && (forw_packet->if_incoming != primary_if))) { - /* FIXME: what about aggregated packets ? */ batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "%s packet (originator %pM, seqno %u, TTL %d) on interface %s [%pM]\n", @@ -325,7 +323,6 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, if (time_before(send_time, forw_packet->send_time) && time_after_eq(aggregation_end_time, forw_packet->send_time) && (aggregated_bytes <= BATADV_MAX_AGGREGATION_BYTES)) { - /* check aggregation compatibility * -> direct link packets are broadcasted on * their interface only @@ -490,7 +487,6 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, */ struct batadv_forw_packet *forw_packet_aggr = NULL; struct batadv_forw_packet *forw_packet_pos = NULL; - struct hlist_node *tmp_node; struct batadv_ogm_packet *batadv_ogm_packet; bool direct_link; unsigned long max_aggregation_jiffies; @@ -503,7 +499,7 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->forw_bat_list_lock); /* own packets are not to be aggregated */ if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) { - hlist_for_each_entry(forw_packet_pos, tmp_node, + hlist_for_each_entry(forw_packet_pos, &bat_priv->forw_bat_list, list) { if (batadv_iv_ogm_can_aggregate(batadv_ogm_packet, bat_priv, packet_len, @@ -658,7 +654,6 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; - struct hlist_node *node; int if_num; uint8_t sum_orig, sum_neigh; uint8_t *neigh_addr; @@ -668,7 +663,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, "update_originator(): Searching and updating originator entry of received packet\n"); rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, + hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { neigh_addr = tmp_neigh_node->addr; if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && @@ -804,7 +799,6 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node; - struct hlist_node *node; uint8_t total_count; uint8_t orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; @@ -813,9 +807,8 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, /* find corresponding one hop neighbor */ rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, + hlist_for_each_entry_rcu(tmp_neigh_node, &orig_neigh_node->neigh_list, list) { - if (!batadv_compare_eth(tmp_neigh_node->addr, orig_neigh_node->orig)) continue; @@ -924,7 +917,6 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_orig_node *orig_node; struct batadv_neigh_node *tmp_neigh_node; - struct hlist_node *node; int is_duplicate = 0; int32_t seq_diff; int need_update = 0; @@ -947,9 +939,8 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, goto out; rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, + hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { - is_duplicate |= batadv_test_bit(tmp_neigh_node->real_bits, orig_node->last_real_seqno, seqno); @@ -1033,7 +1024,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, is_single_hop_neigh = true; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, ttvn %u, crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n", + "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, ttvn %u, crc %#.4x, changes %u, tq %d, TTL %d, V %d, IDF %d)\n", ethhdr->h_source, if_incoming->net_dev->name, if_incoming->net_dev->dev_addr, batadv_ogm_packet->orig, batadv_ogm_packet->prev_sender, @@ -1223,7 +1214,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, /* is single hop (direct) neighbor */ if (is_single_hop_neigh) { - /* mark direct link on incoming interface */ batadv_iv_ogm_forward(orig_node, ethhdr, batadv_ogm_packet, is_single_hop_neigh, @@ -1298,7 +1288,8 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff; /* unpack the aggregated packets and process them one by one */ - do { + while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len, + batadv_ogm_packet->tt_num_changes)) { tt_buff = packet_buff + buff_pos + BATADV_OGM_HLEN; batadv_iv_ogm_process(ethhdr, batadv_ogm_packet, tt_buff, @@ -1309,8 +1300,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, packet_pos = packet_buff + buff_pos; batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; - } while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len, - batadv_ogm_packet->tt_num_changes)); + } kfree_skb(skb); return NET_RX_SUCCESS; diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index 5453b17..9739824 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index cebaae7..a81b932 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 5aebe93..6a4f728 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich * @@ -34,13 +34,14 @@ static const uint8_t batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05}; static void batadv_bla_periodic_work(struct work_struct *work); -static void batadv_bla_send_announce(struct batadv_priv *bat_priv, - struct batadv_backbone_gw *backbone_gw); +static void +batadv_bla_send_announce(struct batadv_priv *bat_priv, + struct batadv_bla_backbone_gw *backbone_gw); /* return the index of the claim */ static inline uint32_t batadv_choose_claim(const void *data, uint32_t size) { - struct batadv_claim *claim = (struct batadv_claim *)data; + struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; uint32_t hash = 0; hash = batadv_hash_bytes(hash, &claim->addr, sizeof(claim->addr)); @@ -57,7 +58,7 @@ static inline uint32_t batadv_choose_claim(const void *data, uint32_t size) static inline uint32_t batadv_choose_backbone_gw(const void *data, uint32_t size) { - struct batadv_claim *claim = (struct batadv_claim *)data; + const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; uint32_t hash = 0; hash = batadv_hash_bytes(hash, &claim->addr, sizeof(claim->addr)); @@ -75,9 +76,9 @@ static inline uint32_t batadv_choose_backbone_gw(const void *data, static int batadv_compare_backbone_gw(const struct hlist_node *node, const void *data2) { - const void *data1 = container_of(node, struct batadv_backbone_gw, + const void *data1 = container_of(node, struct batadv_bla_backbone_gw, hash_entry); - const struct batadv_backbone_gw *gw1 = data1, *gw2 = data2; + const struct batadv_bla_backbone_gw *gw1 = data1, *gw2 = data2; if (!batadv_compare_eth(gw1->orig, gw2->orig)) return 0; @@ -92,9 +93,9 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node, static int batadv_compare_claim(const struct hlist_node *node, const void *data2) { - const void *data1 = container_of(node, struct batadv_claim, + const void *data1 = container_of(node, struct batadv_bla_claim, hash_entry); - const struct batadv_claim *cl1 = data1, *cl2 = data2; + const struct batadv_bla_claim *cl1 = data1, *cl2 = data2; if (!batadv_compare_eth(cl1->addr, cl2->addr)) return 0; @@ -106,7 +107,8 @@ static int batadv_compare_claim(const struct hlist_node *node, } /* free a backbone gw */ -static void batadv_backbone_gw_free_ref(struct batadv_backbone_gw *backbone_gw) +static void +batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw) { if (atomic_dec_and_test(&backbone_gw->refcount)) kfree_rcu(backbone_gw, rcu); @@ -115,16 +117,16 @@ static void batadv_backbone_gw_free_ref(struct batadv_backbone_gw *backbone_gw) /* finally deinitialize the claim */ static void batadv_claim_free_rcu(struct rcu_head *rcu) { - struct batadv_claim *claim; + struct batadv_bla_claim *claim; - claim = container_of(rcu, struct batadv_claim, rcu); + claim = container_of(rcu, struct batadv_bla_claim, rcu); batadv_backbone_gw_free_ref(claim->backbone_gw); kfree(claim); } /* free a claim, call claim_free_rcu if its the last reference */ -static void batadv_claim_free_ref(struct batadv_claim *claim) +static void batadv_claim_free_ref(struct batadv_bla_claim *claim) { if (atomic_dec_and_test(&claim->refcount)) call_rcu(&claim->rcu, batadv_claim_free_rcu); @@ -136,14 +138,14 @@ static void batadv_claim_free_ref(struct batadv_claim *claim) * looks for a claim in the hash, and returns it if found * or NULL otherwise. */ -static struct batadv_claim *batadv_claim_hash_find(struct batadv_priv *bat_priv, - struct batadv_claim *data) +static struct batadv_bla_claim +*batadv_claim_hash_find(struct batadv_priv *bat_priv, + struct batadv_bla_claim *data) { struct batadv_hashtable *hash = bat_priv->bla.claim_hash; struct hlist_head *head; - struct hlist_node *node; - struct batadv_claim *claim; - struct batadv_claim *claim_tmp = NULL; + struct batadv_bla_claim *claim; + struct batadv_bla_claim *claim_tmp = NULL; int index; if (!hash) @@ -153,7 +155,7 @@ static struct batadv_claim *batadv_claim_hash_find(struct batadv_priv *bat_priv, head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(claim, node, head, hash_entry) { + hlist_for_each_entry_rcu(claim, head, hash_entry) { if (!batadv_compare_claim(&claim->hash_entry, data)) continue; @@ -176,15 +178,14 @@ static struct batadv_claim *batadv_claim_hash_find(struct batadv_priv *bat_priv, * * Returns claim if found or NULL otherwise. */ -static struct batadv_backbone_gw * +static struct batadv_bla_backbone_gw * batadv_backbone_hash_find(struct batadv_priv *bat_priv, uint8_t *addr, short vid) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; - struct hlist_node *node; - struct batadv_backbone_gw search_entry, *backbone_gw; - struct batadv_backbone_gw *backbone_gw_tmp = NULL; + struct batadv_bla_backbone_gw search_entry, *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw_tmp = NULL; int index; if (!hash) @@ -197,7 +198,7 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { if (!batadv_compare_backbone_gw(&backbone_gw->hash_entry, &search_entry)) continue; @@ -215,12 +216,12 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, /* delete all claims for a backbone */ static void -batadv_bla_del_backbone_claims(struct batadv_backbone_gw *backbone_gw) +batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) { struct batadv_hashtable *hash; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; - struct batadv_claim *claim; + struct batadv_bla_claim *claim; int i; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -233,14 +234,13 @@ batadv_bla_del_backbone_claims(struct batadv_backbone_gw *backbone_gw) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(claim, node, node_tmp, + hlist_for_each_entry_safe(claim, node_tmp, head, hash_entry) { - if (claim->backbone_gw != backbone_gw) continue; batadv_claim_free_ref(claim); - hlist_del_rcu(node); + hlist_del_rcu(&claim->hash_entry); } spin_unlock_bh(list_lock); } @@ -338,7 +338,6 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, "bla_send_claim(): REQUEST of %pM to %pMon vid %d\n", ethhdr->h_source, ethhdr->h_dest, vid); break; - } if (vid != -1) @@ -366,11 +365,11 @@ out: * searches for the backbone gw or creates a new one if it could not * be found. */ -static struct batadv_backbone_gw * +static struct batadv_bla_backbone_gw * batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, short vid, bool own_backbone) { - struct batadv_backbone_gw *entry; + struct batadv_bla_backbone_gw *entry; struct batadv_orig_node *orig_node; int hash_added; @@ -437,7 +436,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, short vid) { - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; backbone_gw = batadv_bla_get_backbone_gw(bat_priv, primary_if->net_dev->dev_addr, @@ -459,11 +458,10 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, short vid) { - struct hlist_node *node; struct hlist_head *head; struct batadv_hashtable *hash; - struct batadv_claim *claim; - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_claim *claim; + struct batadv_bla_backbone_gw *backbone_gw; int i; batadv_dbg(BATADV_DBG_BLA, bat_priv, @@ -480,7 +478,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(claim, node, head, hash_entry) { + hlist_for_each_entry_rcu(claim, head, hash_entry) { /* only own claims are interesting */ if (claim->backbone_gw != backbone_gw) continue; @@ -502,7 +500,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, * After the request, it will repeat all of his own claims and finally * send an announcement claim with which we can check again. */ -static void batadv_bla_send_request(struct batadv_backbone_gw *backbone_gw) +static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw) { /* first, remove all old entries */ batadv_bla_del_backbone_claims(backbone_gw); @@ -528,7 +526,7 @@ static void batadv_bla_send_request(struct batadv_backbone_gw *backbone_gw) * places. */ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, - struct batadv_backbone_gw *backbone_gw) + struct batadv_bla_backbone_gw *backbone_gw) { uint8_t mac[ETH_ALEN]; __be16 crc; @@ -539,7 +537,6 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, batadv_bla_send_claim(bat_priv, mac, backbone_gw->vid, BATADV_CLAIM_TYPE_ANNOUNCE); - } /** @@ -551,10 +548,10 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, */ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, const uint8_t *mac, const short vid, - struct batadv_backbone_gw *backbone_gw) + struct batadv_bla_backbone_gw *backbone_gw) { - struct batadv_claim *claim; - struct batadv_claim search_claim; + struct batadv_bla_claim *claim; + struct batadv_bla_claim search_claim; int hash_added; memcpy(search_claim.addr, mac, ETH_ALEN); @@ -598,7 +595,6 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); batadv_backbone_gw_free_ref(claim->backbone_gw); - } /* set (new) backbone gw */ atomic_inc(&backbone_gw->refcount); @@ -617,7 +613,7 @@ claim_free_ref: static void batadv_bla_del_claim(struct batadv_priv *bat_priv, const uint8_t *mac, const short vid) { - struct batadv_claim search_claim, *claim; + struct batadv_bla_claim search_claim, *claim; memcpy(search_claim.addr, mac, ETH_ALEN); search_claim.vid = vid; @@ -643,7 +639,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, uint8_t *an_addr, uint8_t *backbone_addr, short vid) { - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; uint16_t crc; if (memcmp(an_addr, batadv_announce_mac, 4) != 0) @@ -661,12 +657,12 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, crc = ntohs(*((__be16 *)(&an_addr[4]))); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %04x\n", + "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", vid, backbone_gw->orig, crc); if (backbone_gw->crc != crc) { batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, - "handle_announce(): CRC FAILED for %pM/%d (my = %04x, sent = %04x)\n", + "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", backbone_gw->orig, backbone_gw->vid, backbone_gw->crc, crc); @@ -715,7 +711,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, uint8_t *backbone_addr, uint8_t *claim_addr, short vid) { - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; /* unclaim in any case if it is our own */ if (primary_if && batadv_compare_eth(backbone_addr, @@ -744,7 +740,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv, uint8_t *backbone_addr, uint8_t *claim_addr, short vid) { - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; /* register the gateway if not yet available, and add the claim. */ @@ -835,7 +831,7 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, /* if our mesh friends mac is bigger, use it for ourselves. */ if (ntohs(bla_dst->group) > ntohs(bla_dst_own->group)) { batadv_dbg(BATADV_DBG_BLA, bat_priv, - "taking other backbones claim group: %04x\n", + "taking other backbones claim group: %#.4x\n", ntohs(bla_dst->group)); bla_dst_own->group = bla_dst->group; } @@ -958,8 +954,8 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, */ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) { - struct batadv_backbone_gw *backbone_gw; - struct hlist_node *node, *node_tmp; + struct batadv_bla_backbone_gw *backbone_gw; + struct hlist_node *node_tmp; struct hlist_head *head; struct batadv_hashtable *hash; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -974,7 +970,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(backbone_gw, node, node_tmp, + hlist_for_each_entry_safe(backbone_gw, node_tmp, head, hash_entry) { if (now) goto purge_now; @@ -993,7 +989,7 @@ purge_now: batadv_bla_del_backbone_claims(backbone_gw); - hlist_del_rcu(node); + hlist_del_rcu(&backbone_gw->hash_entry); batadv_backbone_gw_free_ref(backbone_gw); } spin_unlock_bh(list_lock); @@ -1013,8 +1009,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, int now) { - struct batadv_claim *claim; - struct hlist_node *node; + struct batadv_bla_claim *claim; struct hlist_head *head; struct batadv_hashtable *hash; int i; @@ -1027,7 +1022,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(claim, node, head, hash_entry) { + hlist_for_each_entry_rcu(claim, head, hash_entry) { if (now) goto purge_now; if (!batadv_compare_eth(claim->backbone_gw->orig, @@ -1062,8 +1057,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, struct batadv_hard_iface *oldif) { - struct batadv_backbone_gw *backbone_gw; - struct hlist_node *node; + struct batadv_bla_backbone_gw *backbone_gw; struct hlist_head *head; struct batadv_hashtable *hash; __be16 group; @@ -1087,7 +1081,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { /* own orig still holds the old value. */ if (!batadv_compare_eth(backbone_gw->orig, oldif->net_dev->dev_addr)) @@ -1104,16 +1098,6 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, } } - - -/* (re)start the timer */ -static void batadv_bla_start_timer(struct batadv_priv *bat_priv) -{ - INIT_DELAYED_WORK(&bat_priv->bla.work, batadv_bla_periodic_work); - queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work, - msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH)); -} - /* periodic work to do: * * purge structures when they are too old * * send announcements @@ -1123,9 +1107,8 @@ static void batadv_bla_periodic_work(struct work_struct *work) struct delayed_work *delayed_work; struct batadv_priv *bat_priv; struct batadv_priv_bla *priv_bla; - struct hlist_node *node; struct hlist_head *head; - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; struct batadv_hashtable *hash; struct batadv_hard_iface *primary_if; int i; @@ -1151,7 +1134,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { if (!batadv_compare_eth(backbone_gw->orig, primary_if->net_dev->dev_addr)) continue; @@ -1184,7 +1167,8 @@ out: if (primary_if) batadv_hardif_free_ref(primary_if); - batadv_bla_start_timer(bat_priv); + queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work, + msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH)); } /* The hash for claim and backbone hash receive the same key because they @@ -1242,7 +1226,10 @@ int batadv_bla_init(struct batadv_priv *bat_priv) batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla hashes initialized\n"); - batadv_bla_start_timer(bat_priv); + INIT_DELAYED_WORK(&bat_priv->bla.work, batadv_bla_periodic_work); + + queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work, + msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH)); return 0; } @@ -1329,8 +1316,7 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; - struct hlist_node *node; - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; int i; if (!atomic_read(&bat_priv->bridge_loop_avoidance)) @@ -1343,7 +1329,7 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { if (batadv_compare_eth(backbone_gw->orig, orig)) { rcu_read_unlock(); return 1; @@ -1371,7 +1357,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, { struct ethhdr *ethhdr; struct vlan_ethhdr *vhdr; - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; short vid = -1; if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance)) @@ -1442,7 +1428,7 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, bool is_bcast) { struct ethhdr *ethhdr; - struct batadv_claim search_claim, *claim = NULL; + struct batadv_bla_claim search_claim, *claim = NULL; struct batadv_hard_iface *primary_if; int ret; @@ -1536,7 +1522,7 @@ out: int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) { struct ethhdr *ethhdr; - struct batadv_claim search_claim, *claim = NULL; + struct batadv_bla_claim search_claim, *claim = NULL; struct batadv_hard_iface *primary_if; int ret = 0; @@ -1612,9 +1598,8 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); struct batadv_hashtable *hash = bat_priv->bla.claim_hash; - struct batadv_claim *claim; + struct batadv_bla_claim *claim; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; uint32_t i; bool is_own; @@ -1626,19 +1611,19 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) primary_addr = primary_if->net_dev->dev_addr; seq_printf(seq, - "Claims announced for the mesh %s (orig %pM, group id %04x)\n", + "Claims announced for the mesh %s (orig %pM, group id %#.4x)\n", net_dev->name, primary_addr, ntohs(bat_priv->bla.claim_dest.group)); - seq_printf(seq, " %-17s %-5s %-17s [o] (%-4s)\n", + seq_printf(seq, " %-17s %-5s %-17s [o] (%-6s)\n", "Client", "VID", "Originator", "CRC"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(claim, node, head, hash_entry) { + hlist_for_each_entry_rcu(claim, head, hash_entry) { is_own = batadv_compare_eth(claim->backbone_gw->orig, primary_addr); - seq_printf(seq, " * %pM on % 5d by %pM [%c] (%04x)\n", + seq_printf(seq, " * %pM on % 5d by %pM [%c] (%#.4x)\n", claim->addr, claim->vid, claim->backbone_gw->orig, (is_own ? 'x' : ' '), @@ -1657,9 +1642,8 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; int secs, msecs; uint32_t i; @@ -1672,16 +1656,16 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) primary_addr = primary_if->net_dev->dev_addr; seq_printf(seq, - "Backbones announced for the mesh %s (orig %pM, group id %04x)\n", + "Backbones announced for the mesh %s (orig %pM, group id %#.4x)\n", net_dev->name, primary_addr, ntohs(bat_priv->bla.claim_dest.group)); - seq_printf(seq, " %-17s %-5s %-9s (%-4s)\n", + seq_printf(seq, " %-17s %-5s %-9s (%-6s)\n", "Originator", "VID", "last seen", "CRC"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { msecs = jiffies_to_msecs(jiffies - backbone_gw->lasttime); secs = msecs / 1000; @@ -1693,7 +1677,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) continue; seq_printf(seq, - " * %pM on % 5d % 4i.%03is (%04x)\n", + " * %pM on % 5d % 4i.%03is (%#.4x)\n", backbone_gw->orig, backbone_gw->vid, secs, msecs, backbone_gw->crc); } diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 196d9a0..dea2fbc 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich * diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 6f58ddd..6ae8651 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -40,13 +40,14 @@ static struct dentry *batadv_debugfs; static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN; -static char *batadv_log_char_addr(struct batadv_debug_log *debug_log, +static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log, size_t idx) { return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK]; } -static void batadv_emit_log_char(struct batadv_debug_log *debug_log, char c) +static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log, + char c) { char *char_addr; @@ -59,7 +60,7 @@ static void batadv_emit_log_char(struct batadv_debug_log *debug_log, char c) } __printf(2, 3) -static int batadv_fdebug_log(struct batadv_debug_log *debug_log, +static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log, const char *fmt, ...) { va_list args; @@ -114,7 +115,7 @@ static int batadv_log_release(struct inode *inode, struct file *file) return 0; } -static int batadv_log_empty(struct batadv_debug_log *debug_log) +static int batadv_log_empty(struct batadv_priv_debug_log *debug_log) { return !(debug_log->log_start - debug_log->log_end); } @@ -123,7 +124,7 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct batadv_priv *bat_priv = file->private_data; - struct batadv_debug_log *debug_log = bat_priv->debug_log; + struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; int error, i = 0; char *char_addr; char c; @@ -164,7 +165,6 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf, buf++; i++; - } spin_unlock_bh(&debug_log->lock); @@ -178,7 +178,7 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf, static unsigned int batadv_log_poll(struct file *file, poll_table *wait) { struct batadv_priv *bat_priv = file->private_data; - struct batadv_debug_log *debug_log = bat_priv->debug_log; + struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; poll_wait(file, &debug_log->queue_wait, wait); @@ -230,7 +230,6 @@ static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) #else /* CONFIG_BATMAN_ADV_DEBUG */ static int batadv_debug_log_setup(struct batadv_priv *bat_priv) { - bat_priv->debug_log = NULL; return 0; } @@ -397,10 +396,8 @@ err: void batadv_debugfs_destroy(void) { - if (batadv_debugfs) { - debugfs_remove_recursive(batadv_debugfs); - batadv_debugfs = NULL; - } + debugfs_remove_recursive(batadv_debugfs); + batadv_debugfs = NULL; } int batadv_debugfs_add_meshif(struct net_device *dev) diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 3319e1f..f8c3849 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 8e1d89d..d54188a 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: * * Antonio Quartulli * @@ -83,7 +83,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, { spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_dat_entry *dat_entry; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; uint32_t i; @@ -95,7 +95,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, list_lock = &bat_priv->dat.hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(dat_entry, node, node_tmp, head, + hlist_for_each_entry_safe(dat_entry, node_tmp, head, hash_entry) { /* if an helper function has been passed as parameter, * ask it if the entry has to be purged or not @@ -103,7 +103,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, if (to_purge && !to_purge(dat_entry)) continue; - hlist_del_rcu(node); + hlist_del_rcu(&dat_entry->hash_entry); batadv_dat_entry_free_ref(dat_entry); } spin_unlock_bh(list_lock); @@ -235,7 +235,6 @@ static struct batadv_dat_entry * batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) { struct hlist_head *head; - struct hlist_node *node; struct batadv_dat_entry *dat_entry, *dat_entry_tmp = NULL; struct batadv_hashtable *hash = bat_priv->dat.hash; uint32_t index; @@ -247,7 +246,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) { + hlist_for_each_entry_rcu(dat_entry, head, hash_entry) { if (dat_entry->ip != ip) continue; @@ -440,7 +439,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, /* this is an hash collision with the temporary selected node. Choose * the one with the lowest address */ - if ((tmp_max == max) && + if ((tmp_max == max) && max_orig_node && (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)) goto out; @@ -465,7 +464,6 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, batadv_dat_addr_t max = 0, tmp_max = 0; struct batadv_orig_node *orig_node, *max_orig_node = NULL; struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; int i; @@ -481,7 +479,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { /* the dht space is a ring and addresses are unsigned */ tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr + ip_key; @@ -686,7 +684,6 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) struct batadv_hashtable *hash = bat_priv->dat.hash; struct batadv_dat_entry *dat_entry; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; unsigned long last_seen_jiffies; int last_seen_msecs, last_seen_secs, last_seen_mins; @@ -704,7 +701,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) { + hlist_for_each_entry_rcu(dat_entry, head, hash_entry) { last_seen_jiffies = jiffies - dat_entry->last_update; last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); last_seen_mins = last_seen_msecs / 60000; @@ -738,6 +735,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, struct arphdr *arphdr; struct ethhdr *ethhdr; __be32 ip_src, ip_dst; + uint8_t *hw_src, *hw_dst; uint16_t type = 0; /* pull the ethernet header */ @@ -777,9 +775,23 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, ip_src = batadv_arp_ip_src(skb, hdr_size); ip_dst = batadv_arp_ip_dst(skb, hdr_size); if (ipv4_is_loopback(ip_src) || ipv4_is_multicast(ip_src) || - ipv4_is_loopback(ip_dst) || ipv4_is_multicast(ip_dst)) + ipv4_is_loopback(ip_dst) || ipv4_is_multicast(ip_dst) || + ipv4_is_zeronet(ip_src) || ipv4_is_lbcast(ip_src) || + ipv4_is_zeronet(ip_dst) || ipv4_is_lbcast(ip_dst)) goto out; + hw_src = batadv_arp_hw_src(skb, hdr_size); + if (is_zero_ether_addr(hw_src) || is_multicast_ether_addr(hw_src)) + goto out; + + /* we don't care about the destination MAC address in ARP requests */ + if (arphdr->ar_op != htons(ARPOP_REQUEST)) { + hw_dst = batadv_arp_hw_dst(skb, hdr_size); + if (is_zero_ether_addr(hw_dst) || + is_multicast_ether_addr(hw_dst)) + goto out; + } + type = ntohs(arphdr->ar_op); out: return type; @@ -1012,6 +1024,8 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, */ ret = !batadv_is_my_client(bat_priv, hw_dst); out: + if (ret) + kfree_skb(skb); /* if ret == false -> packet has to be delivered to the interface */ return ret; } diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index d060c03..125c8c6 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: * * Antonio Quartulli * diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index dd07c7e..34f99a4 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -114,7 +114,6 @@ static struct batadv_gw_node * batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) { struct batadv_neigh_node *router; - struct hlist_node *node; struct batadv_gw_node *gw_node, *curr_gw = NULL; uint32_t max_gw_factor = 0, tmp_gw_factor = 0; uint32_t gw_divisor; @@ -127,7 +126,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) gw_divisor *= 64; rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { if (gw_node->deleted) continue; @@ -344,7 +343,6 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, uint8_t new_gwflags) { - struct hlist_node *node; struct batadv_gw_node *gw_node, *curr_gw; /* Note: We don't need a NULL check here, since curr_gw never gets @@ -355,7 +353,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, curr_gw = batadv_gw_get_selected_gw_node(bat_priv); rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { if (gw_node->orig_node != orig_node) continue; @@ -403,7 +401,7 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv, void batadv_gw_node_purge(struct batadv_priv *bat_priv) { struct batadv_gw_node *gw_node, *curr_gw; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; unsigned long timeout = msecs_to_jiffies(2 * BATADV_PURGE_TIMEOUT); int do_deselect = 0; @@ -411,7 +409,7 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv) spin_lock_bh(&bat_priv->gw.list_lock); - hlist_for_each_entry_safe(gw_node, node, node_tmp, + hlist_for_each_entry_safe(gw_node, node_tmp, &bat_priv->gw.list, list) { if (((!gw_node->deleted) || (time_before(jiffies, gw_node->deleted + timeout))) && @@ -476,7 +474,6 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) struct batadv_priv *bat_priv = netdev_priv(net_dev); struct batadv_hard_iface *primary_if; struct batadv_gw_node *gw_node; - struct hlist_node *node; int gw_count = 0; primary_if = batadv_seq_print_text_primary_if_get(seq); @@ -490,7 +487,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) primary_if->net_dev->dev_addr, net_dev->name); rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { if (gw_node->deleted) continue; diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index f0d129e..039902d 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 9001208..84bb2b1 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index 13697f6..509b2bf 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index f1d37cd..368219e 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -457,6 +457,24 @@ out: batadv_hardif_free_ref(primary_if); } +/** + * batadv_hardif_remove_interface_finish - cleans up the remains of a hardif + * @work: work queue item + * + * Free the parts of the hard interface which can not be removed under + * rtnl lock (to prevent deadlock situations). + */ +static void batadv_hardif_remove_interface_finish(struct work_struct *work) +{ + struct batadv_hard_iface *hard_iface; + + hard_iface = container_of(work, struct batadv_hard_iface, + cleanup_work); + + batadv_sysfs_del_hardif(&hard_iface->hardif_obj); + batadv_hardif_free_ref(hard_iface); +} + static struct batadv_hard_iface * batadv_hardif_add_interface(struct net_device *net_dev) { @@ -484,6 +502,9 @@ batadv_hardif_add_interface(struct net_device *net_dev) hard_iface->soft_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; INIT_LIST_HEAD(&hard_iface->list); + INIT_WORK(&hard_iface->cleanup_work, + batadv_hardif_remove_interface_finish); + /* extra reference for return */ atomic_set(&hard_iface->refcount, 2); @@ -518,8 +539,7 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) return; hard_iface->if_status = BATADV_IF_TO_BE_REMOVED; - batadv_sysfs_del_hardif(&hard_iface->hardif_obj); - batadv_hardif_free_ref(hard_iface); + queue_work(batadv_event_workqueue, &hard_iface->cleanup_work); } void batadv_hardif_remove_interfaces(void) diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 3732366..308437d 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 15a849c..7198daf 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index e053339..1b4da72 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -89,7 +89,7 @@ static inline void batadv_hash_delete(struct batadv_hashtable *hash, * * Returns the new hash value. */ -static inline uint32_t batadv_hash_bytes(uint32_t hash, void *data, +static inline uint32_t batadv_hash_bytes(uint32_t hash, const void *data, uint32_t size) { const unsigned char *key = data; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 87ca809..0ba6c89 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index 29443a1..1fcca37 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index f65a222..fa563e4 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -169,7 +169,7 @@ void batadv_mesh_free(struct net_device *soft_iface) atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); } -int batadv_is_my_mac(const uint8_t *addr) +int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr) { const struct batadv_hard_iface *hard_iface; @@ -178,6 +178,9 @@ int batadv_is_my_mac(const uint8_t *addr) if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) { rcu_read_unlock(); return 1; @@ -345,9 +348,8 @@ void batadv_recv_handler_unregister(uint8_t packet_type) static struct batadv_algo_ops *batadv_algo_get(char *name) { struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp; - struct hlist_node *node; - hlist_for_each_entry(bat_algo_ops_tmp, node, &batadv_algo_list, list) { + hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) { if (strcmp(bat_algo_ops_tmp->name, name) != 0) continue; @@ -411,11 +413,10 @@ out: int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) { struct batadv_algo_ops *bat_algo_ops; - struct hlist_node *node; seq_printf(seq, "Available routing algorithms:\n"); - hlist_for_each_entry(bat_algo_ops, node, &batadv_algo_list, list) { + hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { seq_printf(seq, "%s\n", bat_algo_ops->name); } diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 2f85577..d40910d 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -26,7 +26,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2012.5.0" +#define BATADV_SOURCE_VERSION "2013.1.0" #endif /* B.A.T.M.A.N. parameters */ @@ -41,9 +41,11 @@ * -> TODO: check influence on BATADV_TQ_LOCAL_WINDOW_SIZE */ #define BATADV_PURGE_TIMEOUT 200000 /* 200 seconds */ -#define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in milliseconds */ +#define BATADV_TT_LOCAL_TIMEOUT 600000 /* in milliseconds */ #define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in milliseconds */ #define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */ +#define BATADV_TT_WORK_PERIOD 5000 /* 5 seconds */ +#define BATADV_ORIG_WORK_PERIOD 1000 /* 1 second */ #define BATADV_DAT_ENTRY_TIMEOUT (5*60000) /* 5 mins in milliseconds */ /* sliding packet range of received originator messages in sequence numbers * (should be a multiple of our word size) @@ -160,7 +162,7 @@ extern struct workqueue_struct *batadv_event_workqueue; int batadv_mesh_init(struct net_device *soft_iface); void batadv_mesh_free(struct net_device *soft_iface); -int batadv_is_my_mac(const uint8_t *addr); +int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_hard_iface * batadv_seq_print_text_primary_if_get(struct seq_file *seq); int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, @@ -276,9 +278,7 @@ static inline bool batadv_has_timed_out(unsigned long timestamp, static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx, size_t count) { - int cpu = get_cpu(); - per_cpu_ptr(bat_priv->bat_counters, cpu)[idx] += count; - put_cpu(); + this_cpu_add(bat_priv->bat_counters[idx], count); } #define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8c32cf1..96fb80b 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -29,14 +29,10 @@ #include "soft-interface.h" #include "bridge_loop_avoidance.h" -static void batadv_purge_orig(struct work_struct *work); +/* hash class keys */ +static struct lock_class_key batadv_orig_hash_lock_class_key; -static void batadv_start_purge_timer(struct batadv_priv *bat_priv) -{ - INIT_DELAYED_WORK(&bat_priv->orig_work, batadv_purge_orig); - queue_delayed_work(batadv_event_workqueue, - &bat_priv->orig_work, msecs_to_jiffies(1000)); -} +static void batadv_purge_orig(struct work_struct *work); /* returns 1 if they are the same originator */ static int batadv_compare_orig(const struct hlist_node *node, const void *data2) @@ -57,7 +53,14 @@ int batadv_originator_init(struct batadv_priv *bat_priv) if (!bat_priv->orig_hash) goto err; - batadv_start_purge_timer(bat_priv); + batadv_hash_set_lock_class(bat_priv->orig_hash, + &batadv_orig_hash_lock_class_key); + + INIT_DELAYED_WORK(&bat_priv->orig_work, batadv_purge_orig); + queue_delayed_work(batadv_event_workqueue, + &bat_priv->orig_work, + msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD)); + return 0; err: @@ -115,7 +118,7 @@ out: static void batadv_orig_node_free_rcu(struct rcu_head *rcu) { - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node, *tmp_neigh_node; struct batadv_orig_node *orig_node; @@ -131,7 +134,7 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) } /* for all neighbors towards this originator ... */ - hlist_for_each_entry_safe(neigh_node, node, node_tmp, + hlist_for_each_entry_safe(neigh_node, node_tmp, &orig_node->neigh_list, list) { hlist_del_rcu(&neigh_node->list); batadv_neigh_node_free_ref(neigh_node); @@ -158,7 +161,7 @@ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node) void batadv_originator_free(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* spinlock to protect write access */ struct batadv_orig_node *orig_node; @@ -176,10 +179,9 @@ void batadv_originator_free(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(orig_node, node, node_tmp, + hlist_for_each_entry_safe(orig_node, node_tmp, head, hash_entry) { - - hlist_del_rcu(node); + hlist_del_rcu(&orig_node->hash_entry); batadv_orig_node_free_ref(orig_node); } spin_unlock_bh(list_lock); @@ -272,7 +274,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_neigh_node **best_neigh_node) { - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; bool neigh_purged = false; unsigned long last_seen; @@ -283,9 +285,8 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, spin_lock_bh(&orig_node->neigh_list_lock); /* for all neighbors towards this originator ... */ - hlist_for_each_entry_safe(neigh_node, node, node_tmp, + hlist_for_each_entry_safe(neigh_node, node_tmp, &orig_node->neigh_list, list) { - last_seen = neigh_node->last_seen; if_incoming = neigh_node->if_incoming; @@ -293,7 +294,6 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, (if_incoming->if_status == BATADV_IF_INACTIVE) || (if_incoming->if_status == BATADV_IF_NOT_IN_USE) || (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) { - if ((if_incoming->if_status == BATADV_IF_INACTIVE) || (if_incoming->if_status == BATADV_IF_NOT_IN_USE) || (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) @@ -348,7 +348,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, static void _batadv_purge_orig(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* spinlock to protect write access */ struct batadv_orig_node *orig_node; @@ -363,13 +363,13 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(orig_node, node, node_tmp, + hlist_for_each_entry_safe(orig_node, node_tmp, head, hash_entry) { if (batadv_purge_orig_node(bat_priv, orig_node)) { if (orig_node->gw_flags) batadv_gw_node_delete(bat_priv, orig_node); - hlist_del_rcu(node); + hlist_del_rcu(&orig_node->hash_entry); batadv_orig_node_free_ref(orig_node); continue; } @@ -393,7 +393,9 @@ static void batadv_purge_orig(struct work_struct *work) delayed_work = container_of(work, struct delayed_work, work); bat_priv = container_of(delayed_work, struct batadv_priv, orig_work); _batadv_purge_orig(bat_priv); - batadv_start_purge_timer(bat_priv); + queue_delayed_work(batadv_event_workqueue, + &bat_priv->orig_work, + msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD)); } void batadv_purge_orig_ref(struct batadv_priv *bat_priv) @@ -406,7 +408,6 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node, *node_tmp; struct hlist_head *head; struct batadv_hard_iface *primary_if; struct batadv_orig_node *orig_node; @@ -432,7 +433,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { neigh_node = batadv_orig_node_get_router(orig_node); if (!neigh_node) continue; @@ -451,7 +452,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) neigh_node->addr, neigh_node->if_incoming->net_dev->name); - hlist_for_each_entry_rcu(neigh_node_tmp, node_tmp, + hlist_for_each_entry_rcu(neigh_node_tmp, &orig_node->neigh_list, list) { seq_printf(seq, " %pM (%3i)", neigh_node_tmp->addr, @@ -509,7 +510,6 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; uint32_t i; @@ -522,7 +522,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { spin_lock_bh(&orig_node->ogm_cnt_lock); ret = batadv_orig_node_add_if(orig_node, max_if_num); spin_unlock_bh(&orig_node->ogm_cnt_lock); @@ -593,7 +593,6 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_hard_iface *hard_iface_tmp; struct batadv_orig_node *orig_node; @@ -607,7 +606,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { spin_lock_bh(&orig_node->ogm_cnt_lock); ret = batadv_orig_node_del_if(orig_node, max_if_num, hard_iface->if_num); diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 9778e65..7df48fa 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -68,7 +68,6 @@ batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data) { struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; - struct hlist_node *node; struct batadv_orig_node *orig_node, *orig_node_tmp = NULL; int index; @@ -79,7 +78,7 @@ batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data) head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { if (!batadv_compare_eth(orig_node, data)) continue; diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index cb6405bf..ed0aa89 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c index c8f61e3..ccab0bb 100644 --- a/net/batman-adv/ring_buffer.c +++ b/net/batman-adv/ring_buffer.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h index fda8c17..3f92ae2 100644 --- a/net/batman-adv/ring_buffer.h +++ b/net/batman-adv/ring_buffer.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 1aa1722..319f290 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -37,7 +37,6 @@ void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; unsigned long *word; @@ -49,7 +48,7 @@ void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { spin_lock_bh(&orig_node->ogm_cnt_lock); word_index = hard_iface->if_num * BATADV_NUM_WORDS; word = &(orig_node->bcast_own[word_index]); @@ -80,7 +79,6 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, /* route added */ } else if ((!curr_router) && (neigh_node)) { - batadv_dbg(BATADV_DBG_ROUTES, bat_priv, "Adding route towards: %pM (via %pM)\n", orig_node->orig, neigh_node->addr); @@ -147,7 +145,6 @@ out: void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node) { - struct hlist_node *node; struct batadv_neigh_node *tmp_neigh_node, *router = NULL; uint8_t interference_candidate = 0; @@ -170,9 +167,8 @@ void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node, * interface. If we do, we won't select this candidate because of * possible interference. */ - hlist_for_each_entry_rcu(tmp_neigh_node, node, + hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { - if (tmp_neigh_node == neigh_node) continue; @@ -406,7 +402,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, goto out; /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) goto out; icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; @@ -420,7 +416,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, } /* packet for me */ - if (batadv_is_my_mac(icmp_packet->dst)) + if (batadv_is_my_mac(bat_priv, icmp_packet->dst)) return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size); /* TTL exceeded */ @@ -552,7 +548,8 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig, return router; } -static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) +static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) { struct ethhdr *ethhdr; @@ -571,7 +568,7 @@ static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) return -1; /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return -1; return 0; @@ -586,7 +583,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) char tt_flag; size_t packet_size; - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) return NET_RX_DROP; /* I could need to modify it */ @@ -618,7 +615,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) case BATADV_TT_RESPONSE: batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_RX); - if (batadv_is_my_mac(tt_query->dst)) { + if (batadv_is_my_mac(bat_priv, tt_query->dst)) { /* packet needs to be linearized to access the TT * changes */ @@ -661,14 +658,15 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if) struct batadv_roam_adv_packet *roam_adv_packet; struct batadv_orig_node *orig_node; - if (batadv_check_unicast_packet(skb, sizeof(*roam_adv_packet)) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, + sizeof(*roam_adv_packet)) < 0) goto out; batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX); roam_adv_packet = (struct batadv_roam_adv_packet *)skb->data; - if (!batadv_is_my_mac(roam_adv_packet->dst)) + if (!batadv_is_my_mac(bat_priv, roam_adv_packet->dst)) return batadv_route_unicast_packet(skb, recv_if); /* check if it is a backbone gateway. we don't accept @@ -836,7 +834,6 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, if (unicast_packet->header.packet_type == BATADV_UNICAST_FRAG && batadv_frag_can_reassemble(skb, neigh_node->if_incoming->net_dev->mtu)) { - ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb); if (ret == NET_RX_DROP) @@ -972,7 +969,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * last time) the packet had an updated information or not */ curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); - if (!batadv_is_my_mac(unicast_packet->dest)) { + if (!batadv_is_my_mac(bat_priv, unicast_packet->dest)) { orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->dest); /* if it is not possible to find the orig_node representing the @@ -1049,14 +1046,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (is4addr) hdr_size = sizeof(*unicast_4addr_packet); - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) return NET_RX_DROP; if (!batadv_check_unicast_ttvn(bat_priv, skb)) return NET_RX_DROP; /* packet for me */ - if (batadv_is_my_mac(unicast_packet->dest)) { + if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { if (is4addr) { batadv_dat_inc_counter(bat_priv, unicast_4addr_packet->subtype); @@ -1093,7 +1090,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, struct sk_buff *new_skb = NULL; int ret; - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) return NET_RX_DROP; if (!batadv_check_unicast_ttvn(bat_priv, skb)) @@ -1102,8 +1099,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, unicast_packet = (struct batadv_unicast_frag_packet *)skb->data; /* packet for me */ - if (batadv_is_my_mac(unicast_packet->dest)) { - + if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb); if (ret == NET_RX_DROP) @@ -1157,13 +1153,13 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, goto out; /* ignore broadcasts sent by myself */ - if (batadv_is_my_mac(ethhdr->h_source)) + if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) goto out; bcast_packet = (struct batadv_bcast_packet *)skb->data; /* ignore broadcasts originated by myself */ - if (batadv_is_my_mac(bcast_packet->orig)) + if (batadv_is_my_mac(bat_priv, bcast_packet->orig)) goto out; if (bcast_packet->header.ttl < 2) @@ -1249,14 +1245,14 @@ int batadv_recv_vis_packet(struct sk_buff *skb, ethhdr = (struct ethhdr *)skb_mac_header(skb); /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return NET_RX_DROP; /* ignore own packets */ - if (batadv_is_my_mac(vis_packet->vis_orig)) + if (batadv_is_my_mac(bat_priv, vis_packet->vis_orig)) return NET_RX_DROP; - if (batadv_is_my_mac(vis_packet->sender_orig)) + if (batadv_is_my_mac(bat_priv, vis_packet->sender_orig)) return NET_RX_DROP; switch (vis_packet->vis_type) { diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 9262279..99eeafa 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 4425af9..a67cffd 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -155,8 +155,6 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, spin_unlock_bh(&bat_priv->forw_bcast_list_lock); /* start timer for this packet */ - INIT_DELAYED_WORK(&forw_packet->delayed_work, - batadv_send_outstanding_bcast_packet); queue_delayed_work(batadv_event_workqueue, &forw_packet->delayed_work, send_time); } @@ -210,6 +208,9 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, /* how often did we send the bcast packet ? */ forw_packet->num_packets = 0; + INIT_DELAYED_WORK(&forw_packet->delayed_work, + batadv_send_outstanding_bcast_packet); + _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay); return NETDEV_TX_OK; @@ -315,7 +316,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, const struct batadv_hard_iface *hard_iface) { struct batadv_forw_packet *forw_packet; - struct hlist_node *tmp_node, *safe_tmp_node; + struct hlist_node *safe_tmp_node; bool pending; if (hard_iface) @@ -328,9 +329,8 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, /* free bcast list */ spin_lock_bh(&bat_priv->forw_bcast_list_lock); - hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node, + hlist_for_each_entry_safe(forw_packet, safe_tmp_node, &bat_priv->forw_bcast_list, list) { - /* if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface */ @@ -355,9 +355,8 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, /* free batman packet list */ spin_lock_bh(&bat_priv->forw_bat_list_lock); - hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node, + hlist_for_each_entry_safe(forw_packet, safe_tmp_node, &bat_priv->forw_bat_list, list) { - /* if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface */ diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 0078dec..38e662f 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 6b548fd..2711e87 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -124,7 +124,6 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) batadv_tt_local_add(dev, addr->sa_data, BATADV_NULL_IFINDEX); } - dev->addr_assign_type &= ~NET_ADDR_RANDOM; return 0; } @@ -181,7 +180,8 @@ static int batadv_interface_tx(struct sk_buff *skb, goto dropped; /* Register the client MAC in the transtable */ - batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif); + if (!is_multicast_ether_addr(ethhdr->h_source)) + batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif); /* don't accept stp packets. STP does not help in meshes. * better use the bridge loop avoidance ... @@ -449,6 +449,30 @@ static void batadv_interface_setup(struct net_device *dev) memset(priv, 0, sizeof(*priv)); } +/** + * batadv_softif_destroy_finish - cleans up the remains of a softif + * @work: work queue item + * + * Free the parts of the soft interface which can not be removed under + * rtnl lock (to prevent deadlock situations). + */ +static void batadv_softif_destroy_finish(struct work_struct *work) +{ + struct batadv_priv *bat_priv; + struct net_device *soft_iface; + + bat_priv = container_of(work, struct batadv_priv, + cleanup_work); + soft_iface = bat_priv->soft_iface; + + batadv_debugfs_del_meshif(soft_iface); + batadv_sysfs_del_meshif(soft_iface); + + rtnl_lock(); + unregister_netdevice(soft_iface); + rtnl_unlock(); +} + struct net_device *batadv_softif_create(const char *name) { struct net_device *soft_iface; @@ -463,6 +487,8 @@ struct net_device *batadv_softif_create(const char *name) goto out; bat_priv = netdev_priv(soft_iface); + bat_priv->soft_iface = soft_iface; + INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish); /* batadv_interface_stats() needs to be available as soon as * register_netdevice() has been called @@ -480,7 +506,9 @@ struct net_device *batadv_softif_create(const char *name) atomic_set(&bat_priv->aggregated_ogms, 1); atomic_set(&bat_priv->bonding, 0); +#ifdef CONFIG_BATMAN_ADV_BLA atomic_set(&bat_priv->bridge_loop_avoidance, 0); +#endif #ifdef CONFIG_BATMAN_ADV_DAT atomic_set(&bat_priv->distributed_arp_table, 1); #endif @@ -491,7 +519,9 @@ struct net_device *batadv_softif_create(const char *name) atomic_set(&bat_priv->gw_bandwidth, 41); atomic_set(&bat_priv->orig_interval, 1000); atomic_set(&bat_priv->hop_penalty, 30); +#ifdef CONFIG_BATMAN_ADV_DEBUG atomic_set(&bat_priv->log_level, 0); +#endif atomic_set(&bat_priv->fragmentation, 1); atomic_set(&bat_priv->bcast_queue_left, BATADV_BCAST_QUEUE_LEN); atomic_set(&bat_priv->batman_queue_left, BATADV_BATMAN_QUEUE_LEN); @@ -547,10 +577,10 @@ out: void batadv_softif_destroy(struct net_device *soft_iface) { - batadv_debugfs_del_meshif(soft_iface); - batadv_sysfs_del_meshif(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(soft_iface); + batadv_mesh_free(soft_iface); - unregister_netdevice(soft_iface); + queue_work(batadv_event_workqueue, &bat_priv->cleanup_work); } int batadv_softif_is_valid(const struct net_device *net_dev) @@ -581,10 +611,10 @@ static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) static void batadv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strcpy(info->driver, "B.A.T.M.A.N. advanced"); - strcpy(info->version, BATADV_SOURCE_VERSION); - strcpy(info->fw_version, "N/A"); - strcpy(info->bus_info, "batman"); + strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); + strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); + strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strlcpy(info->bus_info, "batman", sizeof(info->bus_info)); } static u32 batadv_get_msglevel(struct net_device *dev) diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 07a08fe..43182e5 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 84a55cb..afbba31 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h index 3fd1412..479acf4 100644 --- a/net/batman-adv/sysfs.h +++ b/net/batman-adv/sysfs.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 22457a7..7abee19 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli * @@ -29,6 +29,10 @@ #include <linux/crc16.h> +/* hash class keys */ +static struct lock_class_key batadv_tt_local_hash_lock_class_key; +static struct lock_class_key batadv_tt_global_hash_lock_class_key; + static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, struct batadv_orig_node *orig_node); static void batadv_tt_purge(struct work_struct *work); @@ -48,18 +52,10 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); } -static void batadv_tt_start_timer(struct batadv_priv *bat_priv) -{ - INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge); - queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work, - msecs_to_jiffies(5000)); -} - static struct batadv_tt_common_entry * batadv_tt_hash_find(struct batadv_hashtable *hash, const void *data) { struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_common_entry *tt_common_entry_tmp = NULL; uint32_t index; @@ -71,7 +67,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const void *data) head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) { + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { if (!batadv_compare_eth(tt_common_entry, data)) continue; @@ -112,7 +108,6 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const void *data) struct batadv_tt_global_entry, common); return tt_global_entry; - } static void @@ -235,6 +230,9 @@ static int batadv_tt_local_init(struct batadv_priv *bat_priv) if (!bat_priv->tt.local_hash) return -ENOMEM; + batadv_hash_set_lock_class(bat_priv->tt.local_hash, + &batadv_tt_local_hash_lock_class_key); + return 0; } @@ -249,7 +247,6 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, batadv_choose_orig, tt_global->common.addr); batadv_tt_global_entry_free_ref(tt_global); - } void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, @@ -259,7 +256,6 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, struct batadv_tt_local_entry *tt_local; struct batadv_tt_global_entry *tt_global; struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_orig_list_entry *orig_entry; int hash_added; bool roamed_back = false; @@ -305,7 +301,11 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, (uint8_t)atomic_read(&bat_priv->tt.vn)); memcpy(tt_local->common.addr, addr, ETH_ALEN); - tt_local->common.flags = BATADV_NO_FLAGS; + /* The local entry has to be marked as NEW to avoid to send it in + * a full table response going out before the next ttvn increment + * (consistency check) + */ + tt_local->common.flags = BATADV_TT_CLIENT_NEW; if (batadv_is_wifi_iface(ifindex)) tt_local->common.flags |= BATADV_TT_CLIENT_WIFI; atomic_set(&tt_local->common.refcount, 2); @@ -316,12 +316,6 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, if (batadv_compare_eth(addr, soft_iface->dev_addr)) tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE; - /* The local entry has to be marked as NEW to avoid to send it in - * a full table response going out before the next ttvn increment - * (consistency check) - */ - tt_local->common.flags |= BATADV_TT_CLIENT_NEW; - hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt, batadv_choose_orig, &tt_local->common, &tt_local->common.hash_entry); @@ -343,7 +337,7 @@ check_roaming: /* These node are probably going to update their tt table */ head = &tt_global->orig_list; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_entry, node, head, list) { + hlist_for_each_entry_rcu(orig_entry, head, list) { batadv_send_roam_adv(bat_priv, tt_global->common.addr, orig_entry->orig_node); } @@ -472,37 +466,56 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) struct batadv_priv *bat_priv = netdev_priv(net_dev); struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common_entry; + struct batadv_tt_local_entry *tt_local; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; uint32_t i; + int last_seen_secs; + int last_seen_msecs; + unsigned long last_seen_jiffies; + bool no_purge; + uint16_t np_flag = BATADV_TT_CLIENT_NOPURGE; primary_if = batadv_seq_print_text_primary_if_get(seq); if (!primary_if) goto out; seq_printf(seq, - "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n", - net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn)); + "Locally retrieved addresses (from %s) announced via TT (TTVN: %u CRC: %#.4x):\n", + net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn), + bat_priv->tt.local_crc); + seq_printf(seq, " %-13s %-7s %-10s\n", "Client", "Flags", + "Last seen"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { - seq_printf(seq, " * %pM [%c%c%c%c%c]\n", + tt_local = container_of(tt_common_entry, + struct batadv_tt_local_entry, + common); + last_seen_jiffies = jiffies - tt_local->last_seen; + last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); + last_seen_secs = last_seen_msecs / 1000; + last_seen_msecs = last_seen_msecs % 1000; + + no_purge = tt_common_entry->flags & np_flag; + + seq_printf(seq, " * %pM [%c%c%c%c%c] %3u.%03u\n", tt_common_entry->addr, (tt_common_entry->flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), - (tt_common_entry->flags & - BATADV_TT_CLIENT_NOPURGE ? 'P' : '.'), + no_purge ? 'P' : '.', (tt_common_entry->flags & BATADV_TT_CLIENT_NEW ? 'N' : '.'), (tt_common_entry->flags & BATADV_TT_CLIENT_PENDING ? 'X' : '.'), (tt_common_entry->flags & - BATADV_TT_CLIENT_WIFI ? 'W' : '.')); + BATADV_TT_CLIENT_WIFI ? 'W' : '.'), + no_purge ? 0 : last_seen_secs, + no_purge ? 0 : last_seen_msecs); } rcu_read_unlock(); } @@ -589,9 +602,9 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, { struct batadv_tt_local_entry *tt_local_entry; struct batadv_tt_common_entry *tt_common_entry; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; - hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, head, + hlist_for_each_entry_safe(tt_common_entry, node_tmp, head, hash_entry) { tt_local_entry = container_of(tt_common_entry, struct batadv_tt_local_entry, @@ -627,7 +640,6 @@ static void batadv_tt_local_purge(struct batadv_priv *bat_priv) batadv_tt_local_purge_list(bat_priv, head); spin_unlock_bh(list_lock); } - } static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) @@ -636,7 +648,7 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; uint32_t i; @@ -650,9 +662,9 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, + hlist_for_each_entry_safe(tt_common_entry, node_tmp, head, hash_entry) { - hlist_del_rcu(node); + hlist_del_rcu(&tt_common_entry->hash_entry); tt_local = container_of(tt_common_entry, struct batadv_tt_local_entry, common); @@ -676,6 +688,9 @@ static int batadv_tt_global_init(struct batadv_priv *bat_priv) if (!bat_priv->tt.global_hash) return -ENOMEM; + batadv_hash_set_lock_class(bat_priv->tt.global_hash, + &batadv_tt_global_hash_lock_class_key); + return 0; } @@ -706,11 +721,10 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, { struct batadv_tt_orig_list_entry *tmp_orig_entry, *orig_entry = NULL; const struct hlist_head *head; - struct hlist_node *node; rcu_read_lock(); head = &entry->orig_list; - hlist_for_each_entry_rcu(tmp_orig_entry, node, head, list) { + hlist_for_each_entry_rcu(tmp_orig_entry, head, list) { if (tmp_orig_entry->orig_node != orig_node) continue; if (!atomic_inc_not_zero(&tmp_orig_entry->refcount)) @@ -922,12 +936,11 @@ batadv_transtable_best_orig(struct batadv_tt_global_entry *tt_global_entry) { struct batadv_neigh_node *router = NULL; struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL; int best_tq = 0; head = &tt_global_entry->orig_list; - hlist_for_each_entry_rcu(orig_entry, node, head, list) { + hlist_for_each_entry_rcu(orig_entry, head, list) { router = batadv_orig_node_get_router(orig_entry->orig_node); if (!router) continue; @@ -955,7 +968,6 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry, struct seq_file *seq) { struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_orig_list_entry *orig_entry, *best_entry; struct batadv_tt_common_entry *tt_common_entry; uint16_t flags; @@ -967,10 +979,11 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry, best_entry = batadv_transtable_best_orig(tt_global_entry); if (best_entry) { last_ttvn = atomic_read(&best_entry->orig_node->last_ttvn); - seq_printf(seq, " %c %pM (%3u) via %pM (%3u) [%c%c%c]\n", + seq_printf(seq, + " %c %pM (%3u) via %pM (%3u) (%#.4x) [%c%c%c]\n", '*', tt_global_entry->common.addr, best_entry->ttvn, best_entry->orig_node->orig, - last_ttvn, + last_ttvn, best_entry->orig_node->tt_crc, (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.')); @@ -978,7 +991,7 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry, head = &tt_global_entry->orig_list; - hlist_for_each_entry_rcu(orig_entry, node, head, list) { + hlist_for_each_entry_rcu(orig_entry, head, list) { if (best_entry == orig_entry) continue; @@ -1001,7 +1014,6 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_global_entry *tt_global; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; uint32_t i; @@ -1012,14 +1024,15 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Globally announced TT entries received via the mesh %s\n", net_dev->name); - seq_printf(seq, " %-13s %s %-15s %s %s\n", - "Client", "(TTVN)", "Originator", "(Curr TTVN)", "Flags"); + seq_printf(seq, " %-13s %s %-15s %s (%-6s) %s\n", + "Client", "(TTVN)", "Originator", "(Curr TTVN)", "CRC", + "Flags"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { tt_global = container_of(tt_common_entry, struct batadv_tt_global_entry, @@ -1039,17 +1052,16 @@ static void batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry) { struct hlist_head *head; - struct hlist_node *node, *safe; + struct hlist_node *safe; struct batadv_tt_orig_list_entry *orig_entry; spin_lock_bh(&tt_global_entry->list_lock); head = &tt_global_entry->orig_list; - hlist_for_each_entry_safe(orig_entry, node, safe, head, list) { - hlist_del_rcu(node); + hlist_for_each_entry_safe(orig_entry, safe, head, list) { + hlist_del_rcu(&orig_entry->list); batadv_tt_orig_list_entry_free_ref(orig_entry); } spin_unlock_bh(&tt_global_entry->list_lock); - } static void @@ -1059,18 +1071,18 @@ batadv_tt_global_del_orig_entry(struct batadv_priv *bat_priv, const char *message) { struct hlist_head *head; - struct hlist_node *node, *safe; + struct hlist_node *safe; struct batadv_tt_orig_list_entry *orig_entry; spin_lock_bh(&tt_global_entry->list_lock); head = &tt_global_entry->orig_list; - hlist_for_each_entry_safe(orig_entry, node, safe, head, list) { + hlist_for_each_entry_safe(orig_entry, safe, head, list) { if (orig_entry->orig_node == orig_node) { batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting %pM from global tt entry %pM: %s\n", orig_node->orig, tt_global_entry->common.addr, message); - hlist_del_rcu(node); + hlist_del_rcu(&orig_entry->list); batadv_tt_orig_list_entry_free_ref(orig_entry); } } @@ -1089,7 +1101,6 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, { bool last_entry = true; struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_orig_list_entry *orig_entry; /* no local entry exists, case 1: @@ -1098,7 +1109,7 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, rcu_read_lock(); head = &tt_global_entry->orig_list; - hlist_for_each_entry_rcu(orig_entry, node, head, list) { + hlist_for_each_entry_rcu(orig_entry, head, list) { if (orig_entry->orig_node != orig_node) { last_entry = false; break; @@ -1183,7 +1194,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_tt_common_entry *tt_common_entry; uint32_t i; struct batadv_hashtable *hash = bat_priv->tt.global_hash; - struct hlist_node *node, *safe; + struct hlist_node *safe; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -1195,7 +1206,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common_entry, node, safe, + hlist_for_each_entry_safe(tt_common_entry, safe, head, hash_entry) { tt_global = container_of(tt_common_entry, struct batadv_tt_global_entry, @@ -1208,7 +1219,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM: %s\n", tt_global->common.addr, message); - hlist_del_rcu(node); + hlist_del_rcu(&tt_common_entry->hash_entry); batadv_tt_global_entry_free_ref(tt_global); } } @@ -1243,7 +1254,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct hlist_head *head; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; spinlock_t *list_lock; /* protects write access to the hash lists */ uint32_t i; char *msg = NULL; @@ -1255,7 +1266,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common, node, node_tmp, head, + hlist_for_each_entry_safe(tt_common, node_tmp, head, hash_entry) { tt_global = container_of(tt_common, struct batadv_tt_global_entry, @@ -1268,7 +1279,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv) "Deleting global tt entry (%pM): %s\n", tt_global->common.addr, msg); - hlist_del_rcu(node); + hlist_del_rcu(&tt_common->hash_entry); batadv_tt_global_entry_free_ref(tt_global); } @@ -1282,7 +1293,7 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv) spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_global_entry *tt_global; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; uint32_t i; @@ -1296,9 +1307,9 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, + hlist_for_each_entry_safe(tt_common_entry, node_tmp, head, hash_entry) { - hlist_del_rcu(node); + hlist_del_rcu(&tt_common_entry->hash_entry); tt_global = container_of(tt_common_entry, struct batadv_tt_global_entry, common); @@ -1378,7 +1389,6 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv, struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_global_entry *tt_global; - struct hlist_node *node; struct hlist_head *head; uint32_t i; int j; @@ -1387,7 +1397,7 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common, node, head, hash_entry) { + hlist_for_each_entry_rcu(tt_common, head, hash_entry) { tt_global = container_of(tt_common, struct batadv_tt_global_entry, common); @@ -1430,7 +1440,6 @@ static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv) uint16_t total = 0, total_one; struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; - struct hlist_node *node; struct hlist_head *head; uint32_t i; int j; @@ -1439,7 +1448,7 @@ static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common, node, head, hash_entry) { + hlist_for_each_entry_rcu(tt_common, head, hash_entry) { /* not yet committed clients have not to be taken into * account while computing the CRC */ @@ -1578,7 +1587,6 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_query_packet *tt_response; struct batadv_tt_change *tt_change; - struct hlist_node *node; struct hlist_head *head; struct sk_buff *skb = NULL; uint16_t tt_tot, tt_count; @@ -1608,7 +1616,7 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry_rcu(tt_common_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { if (tt_count == tt_tot) break; @@ -1825,7 +1833,6 @@ out: if (!ret) kfree_skb(skb); return ret; - } static bool @@ -1946,7 +1953,7 @@ out: bool batadv_send_tt_response(struct batadv_priv *bat_priv, struct batadv_tt_query_packet *tt_request) { - if (batadv_is_my_mac(tt_request->dst)) { + if (batadv_is_my_mac(bat_priv, tt_request->dst)) { /* don't answer backbone gws! */ if (batadv_bla_is_backbone_gw_orig(bat_priv, tt_request->src)) return true; @@ -2111,7 +2118,9 @@ int batadv_tt_init(struct batadv_priv *bat_priv) if (ret < 0) return ret; - batadv_tt_start_timer(bat_priv); + INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge); + queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work, + msecs_to_jiffies(BATADV_TT_WORK_PERIOD)); return 1; } @@ -2261,7 +2270,8 @@ static void batadv_tt_purge(struct work_struct *work) batadv_tt_req_purge(bat_priv); batadv_tt_roam_purge(bat_priv); - batadv_tt_start_timer(bat_priv); + queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work, + msecs_to_jiffies(BATADV_TT_WORK_PERIOD)); } void batadv_tt_free(struct batadv_priv *bat_priv) @@ -2286,7 +2296,6 @@ static uint16_t batadv_tt_set_flags(struct batadv_hashtable *hash, uint32_t i; uint16_t changed_num = 0; struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_common_entry *tt_common_entry; if (!hash) @@ -2296,7 +2305,7 @@ static uint16_t batadv_tt_set_flags(struct batadv_hashtable *hash, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { if (enable) { if ((tt_common_entry->flags & flags) == flags) @@ -2321,7 +2330,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_local_entry *tt_local; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ uint32_t i; @@ -2334,7 +2343,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common, node, node_tmp, head, + hlist_for_each_entry_safe(tt_common, node_tmp, head, hash_entry) { if (!(tt_common->flags & BATADV_TT_CLIENT_PENDING)) continue; @@ -2344,7 +2353,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) tt_common->addr); atomic_dec(&bat_priv->tt.local_entry_num); - hlist_del_rcu(node); + hlist_del_rcu(&tt_common->hash_entry); tt_local = container_of(tt_common, struct batadv_tt_local_entry, common); @@ -2352,7 +2361,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) } spin_unlock_bh(list_lock); } - } static int batadv_tt_commit_changes(struct batadv_priv *bat_priv, @@ -2496,7 +2504,7 @@ void batadv_tt_update_orig(struct batadv_priv *bat_priv, orig_node->tt_crc != tt_crc) { request_table: batadv_dbg(BATADV_DBG_TT, bat_priv, - "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %u last_crc: %u num_changes: %u)\n", + "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %#.4x last_crc: %#.4x num_changes: %u)\n", orig_node->orig, ttvn, orig_ttvn, tt_crc, orig_node->tt_crc, tt_num_changes); batadv_send_tt_request(bat_priv, orig_node, ttvn, @@ -2549,7 +2557,6 @@ bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv, batadv_tt_local_entry_free_ref(tt_local_entry); out: return ret; - } bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 46d4451..ab8e683 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli * diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index ae9ac9a..4cd87a0 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -24,6 +24,9 @@ #include "bitarray.h" #include <linux/kernel.h> +/** + * Maximum overhead for the encapsulation for a payload packet + */ #define BATADV_HEADER_LEN \ (ETH_HLEN + max(sizeof(struct batadv_unicast_packet), \ sizeof(struct batadv_bcast_packet))) @@ -51,6 +54,22 @@ struct batadv_hard_iface_bat_iv { atomic_t ogm_seqno; }; +/** + * struct batadv_hard_iface - network device known to batman-adv + * @list: list node for batadv_hardif_list + * @if_num: identificator of the interface + * @if_status: status of the interface for batman-adv + * @net_dev: pointer to the net_device + * @frag_seqno: last fragment sequence number sent by this interface + * @hardif_obj: kobject of the per interface sysfs "mesh" directory + * @refcount: number of contexts the object is used + * @batman_adv_ptype: packet type describing packets that should be processed by + * batman-adv for this interface + * @soft_iface: the batman-adv interface which uses this network interface + * @rcu: struct used for freeing in an RCU-safe manner + * @bat_iv: BATMAN IV specific per hard interface data + * @cleanup_work: work queue callback item for hard interface deinit + */ struct batadv_hard_iface { struct list_head list; int16_t if_num; @@ -63,22 +82,52 @@ struct batadv_hard_iface { struct net_device *soft_iface; struct rcu_head rcu; struct batadv_hard_iface_bat_iv bat_iv; + struct work_struct cleanup_work; }; /** - * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh - * @primary_addr: hosts primary interface address - * @last_seen: when last packet from this node was received - * @bcast_seqno_reset: time when the broadcast seqno window was reset - * @batman_seqno_reset: time when the batman seqno window was reset - * @gw_flags: flags related to gateway class - * @flags: for now only VIS_SERVER flag - * @last_real_seqno: last and best known sequence number - * @last_ttl: ttl of last received packet - * @last_bcast_seqno: last broadcast sequence number received by this host - * - * @candidates: how many candidates are available - * @selected: next bonding candidate + * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh + * @orig: originator ethernet address + * @primary_addr: hosts primary interface address + * @router: router that should be used to reach this originator + * @batadv_dat_addr_t: address of the orig node in the distributed hash + * @bcast_own: bitfield containing the number of our OGMs this orig_node + * rebroadcasted "back" to us (relative to last_real_seqno) + * @bcast_own_sum: counted result of bcast_own + * @last_seen: time when last packet from this node was received + * @bcast_seqno_reset: time when the broadcast seqno window was reset + * @batman_seqno_reset: time when the batman seqno window was reset + * @gw_flags: flags related to gateway class + * @flags: for now only VIS_SERVER flag + * @last_ttvn: last seen translation table version number + * @tt_crc: CRC of the translation table + * @tt_buff: last tt changeset this node received from the orig node + * @tt_buff_len: length of the last tt changeset this node received from the + * orig node + * @tt_buff_lock: lock that protects tt_buff and tt_buff_len + * @tt_size: number of global TT entries announced by the orig node + * @tt_initialised: bool keeping track of whether or not this node have received + * any translation table information from the orig node yet + * @last_real_seqno: last and best known sequence number + * @last_ttl: ttl of last received packet + * @bcast_bits: bitfield containing the info which payload broadcast originated + * from this orig node this host already has seen (relative to + * last_bcast_seqno) + * @last_bcast_seqno: last broadcast sequence number received by this host + * @neigh_list: list of potential next hop neighbor towards this orig node + * @frag_list: fragmentation buffer list for fragment re-assembly + * @last_frag_packet: time when last fragmented packet from this node was + * received + * @neigh_list_lock: lock protecting neigh_list, router and bonding_list + * @hash_entry: hlist node for batadv_priv::orig_hash + * @bat_priv: pointer to soft_iface this orig node belongs to + * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum, + * neigh_node->real_bits & neigh_node->real_packet_count + * @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno + * @bond_candidates: how many candidates are available + * @bond_list: list of bonding candidates + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_orig_node { uint8_t orig[ETH_ALEN]; @@ -94,11 +143,11 @@ struct batadv_orig_node { unsigned long batman_seqno_reset; uint8_t gw_flags; uint8_t flags; - atomic_t last_ttvn; /* last seen translation table version number */ + atomic_t last_ttvn; uint16_t tt_crc; unsigned char *tt_buff; int16_t tt_buff_len; - spinlock_t tt_buff_lock; /* protects tt_buff */ + spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */ atomic_t tt_size; bool tt_initialised; uint32_t last_real_seqno; @@ -107,23 +156,31 @@ struct batadv_orig_node { uint32_t last_bcast_seqno; struct hlist_head neigh_list; struct list_head frag_list; - spinlock_t neigh_list_lock; /* protects neigh_list and router */ - atomic_t refcount; - struct rcu_head rcu; + unsigned long last_frag_packet; + /* neigh_list_lock protects: neigh_list, router & bonding_list */ + spinlock_t neigh_list_lock; struct hlist_node hash_entry; struct batadv_priv *bat_priv; - unsigned long last_frag_packet; /* ogm_cnt_lock protects: bcast_own, bcast_own_sum, - * neigh_node->real_bits, neigh_node->real_packet_count + * neigh_node->real_bits & neigh_node->real_packet_count */ spinlock_t ogm_cnt_lock; - /* bcast_seqno_lock protects bcast_bits, last_bcast_seqno */ + /* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */ spinlock_t bcast_seqno_lock; - spinlock_t tt_list_lock; /* protects tt_list */ atomic_t bond_candidates; struct list_head bond_list; + atomic_t refcount; + struct rcu_head rcu; }; +/** + * struct batadv_gw_node - structure for orig nodes announcing gw capabilities + * @list: list node for batadv_priv_gw::list + * @orig_node: pointer to corresponding orig node + * @deleted: this struct is scheduled for deletion + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ struct batadv_gw_node { struct hlist_node list; struct batadv_orig_node *orig_node; @@ -132,13 +189,28 @@ struct batadv_gw_node { struct rcu_head rcu; }; -/* batadv_neigh_node - * @last_seen: when last packet via this neighbor was received +/** + * struct batadv_neigh_node - structure for single hop neighbors + * @list: list node for batadv_orig_node::neigh_list + * @addr: mac address of neigh node + * @tq_recv: ring buffer of received TQ values from this neigh node + * @tq_index: ring buffer index + * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv) + * @last_ttl: last received ttl from this neigh node + * @bonding_list: list node for batadv_orig_node::bond_list + * @last_seen: when last packet via this neighbor was received + * @real_bits: bitfield containing the number of OGMs received from this neigh + * node (relative to orig_node->last_real_seqno) + * @real_packet_count: counted result of real_bits + * @orig_node: pointer to corresponding orig_node + * @if_incoming: pointer to incoming hard interface + * @lq_update_lock: lock protecting tq_recv & tq_index + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_neigh_node { struct hlist_node list; uint8_t addr[ETH_ALEN]; - uint8_t real_packet_count; uint8_t tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE]; uint8_t tq_index; uint8_t tq_avg; @@ -146,13 +218,20 @@ struct batadv_neigh_node { struct list_head bonding_list; unsigned long last_seen; DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); - atomic_t refcount; - struct rcu_head rcu; + uint8_t real_packet_count; struct batadv_orig_node *orig_node; struct batadv_hard_iface *if_incoming; - spinlock_t lq_update_lock; /* protects: tq_recv, tq_index */ + spinlock_t lq_update_lock; /* protects tq_recv & tq_index */ + atomic_t refcount; + struct rcu_head rcu; }; +/** + * struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression + * @orig[ETH_ALEN]: mac address of orig node orginating the broadcast + * @crc: crc32 checksum of broadcast payload + * @entrytime: time when the broadcast packet was received + */ #ifdef CONFIG_BATMAN_ADV_BLA struct batadv_bcast_duplist_entry { uint8_t orig[ETH_ALEN]; @@ -161,6 +240,33 @@ struct batadv_bcast_duplist_entry { }; #endif +/** + * enum batadv_counters - indices for traffic counters + * @BATADV_CNT_TX: transmitted payload traffic packet counter + * @BATADV_CNT_TX_BYTES: transmitted payload traffic bytes counter + * @BATADV_CNT_TX_DROPPED: dropped transmission payload traffic packet counter + * @BATADV_CNT_RX: received payload traffic packet counter + * @BATADV_CNT_RX_BYTES: received payload traffic bytes counter + * @BATADV_CNT_FORWARD: forwarded payload traffic packet counter + * @BATADV_CNT_FORWARD_BYTES: forwarded payload traffic bytes counter + * @BATADV_CNT_MGMT_TX: transmitted routing protocol traffic packet counter + * @BATADV_CNT_MGMT_TX_BYTES: transmitted routing protocol traffic bytes counter + * @BATADV_CNT_MGMT_RX: received routing protocol traffic packet counter + * @BATADV_CNT_MGMT_RX_BYTES: received routing protocol traffic bytes counter + * @BATADV_CNT_TT_REQUEST_TX: transmitted tt req traffic packet counter + * @BATADV_CNT_TT_REQUEST_RX: received tt req traffic packet counter + * @BATADV_CNT_TT_RESPONSE_TX: transmitted tt resp traffic packet counter + * @BATADV_CNT_TT_RESPONSE_RX: received tt resp traffic packet counter + * @BATADV_CNT_TT_ROAM_ADV_TX: transmitted tt roam traffic packet counter + * @BATADV_CNT_TT_ROAM_ADV_RX: received tt roam traffic packet counter + * @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter + * @BATADV_CNT_DAT_GET_RX: received dht GET traffic packet counter + * @BATADV_CNT_DAT_PUT_TX: transmitted dht PUT traffic packet counter + * @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter + * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet + * counter + * @BATADV_CNT_NUM: number of traffic counters + */ enum batadv_counters { BATADV_CNT_TX, BATADV_CNT_TX_BYTES, @@ -192,14 +298,23 @@ enum batadv_counters { /** * struct batadv_priv_tt - per mesh interface translation table data * @vn: translation table version number + * @ogm_append_cnt: counter of number of OGMs containing the local tt diff * @local_changes: changes registered in an originator interval - * @poss_change: Detect an ongoing roaming phase. If true, then this node - * received a roaming_adv and has to inspect every packet directed to it to - * check whether it still is the true destination or not. This flag will be - * reset to false as soon as the this node's ttvn is increased * @changes_list: tracks tt local changes within an originator interval - * @req_list: list of pending tt_requests + * @local_hash: local translation table hash table + * @global_hash: global translation table hash table + * @req_list: list of pending & unanswered tt_requests + * @roam_list: list of the last roaming events of each client limiting the + * number of roaming events to avoid route flapping + * @changes_list_lock: lock protecting changes_list + * @req_list_lock: lock protecting req_list + * @roam_list_lock: lock protecting roam_list + * @local_entry_num: number of entries in the local hash table * @local_crc: Checksum of the local table, recomputed before sending a new OGM + * @last_changeset: last tt changeset this host has generated + * @last_changeset_len: length of last tt changeset this host has generated + * @last_changeset_lock: lock protecting last_changeset & last_changeset_len + * @work: work queue callback item for translation table purging */ struct batadv_priv_tt { atomic_t vn; @@ -217,36 +332,83 @@ struct batadv_priv_tt { uint16_t local_crc; unsigned char *last_changeset; int16_t last_changeset_len; - spinlock_t last_changeset_lock; /* protects last_changeset */ + /* protects last_changeset & last_changeset_len */ + spinlock_t last_changeset_lock; struct delayed_work work; }; +/** + * struct batadv_priv_bla - per mesh interface bridge loope avoidance data + * @num_requests; number of bla requests in flight + * @claim_hash: hash table containing mesh nodes this host has claimed + * @backbone_hash: hash table containing all detected backbone gateways + * @bcast_duplist: recently received broadcast packets array (for broadcast + * duplicate suppression) + * @bcast_duplist_curr: index of last broadcast packet added to bcast_duplist + * @bcast_duplist_lock: lock protecting bcast_duplist & bcast_duplist_curr + * @claim_dest: local claim data (e.g. claim group) + * @work: work queue callback item for cleanups & bla announcements + */ #ifdef CONFIG_BATMAN_ADV_BLA struct batadv_priv_bla { - atomic_t num_requests; /* number of bla requests in flight */ + atomic_t num_requests; struct batadv_hashtable *claim_hash; struct batadv_hashtable *backbone_hash; struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE]; int bcast_duplist_curr; - /* protects bcast_duplist and bcast_duplist_curr */ + /* protects bcast_duplist & bcast_duplist_curr */ spinlock_t bcast_duplist_lock; struct batadv_bla_claim_dst claim_dest; struct delayed_work work; }; #endif +/** + * struct batadv_debug_log - debug logging data + * @log_buff: buffer holding the logs (ring bufer) + * @log_start: index of next character to read + * @log_end: index of next character to write + * @lock: lock protecting log_buff, log_start & log_end + * @queue_wait: log reader's wait queue + */ +#ifdef CONFIG_BATMAN_ADV_DEBUG +struct batadv_priv_debug_log { + char log_buff[BATADV_LOG_BUF_LEN]; + unsigned long log_start; + unsigned long log_end; + spinlock_t lock; /* protects log_buff, log_start and log_end */ + wait_queue_head_t queue_wait; +}; +#endif + +/** + * struct batadv_priv_gw - per mesh interface gateway data + * @list: list of available gateway nodes + * @list_lock: lock protecting gw_list & curr_gw + * @curr_gw: pointer to currently selected gateway node + * @reselect: bool indicating a gateway re-selection is in progress + */ struct batadv_priv_gw { struct hlist_head list; - spinlock_t list_lock; /* protects gw_list and curr_gw */ + spinlock_t list_lock; /* protects gw_list & curr_gw */ struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */ atomic_t reselect; }; +/** + * struct batadv_priv_vis - per mesh interface vis data + * @send_list: list of batadv_vis_info packets to sent + * @hash: hash table containing vis data from other nodes in the network + * @hash_lock: lock protecting the hash table + * @list_lock: lock protecting my_info::recv_list + * @work: work queue callback item for vis packet sending + * @my_info: holds this node's vis data sent on a regular basis + */ struct batadv_priv_vis { struct list_head send_list; struct batadv_hashtable *hash; spinlock_t hash_lock; /* protects hash */ - spinlock_t list_lock; /* protects info::recv_list */ + spinlock_t list_lock; /* protects my_info::recv_list */ struct delayed_work work; struct batadv_vis_info *my_info; }; @@ -265,30 +427,78 @@ struct batadv_priv_dat { }; #endif +/** + * struct batadv_priv - per mesh interface data + * @mesh_state: current status of the mesh (inactive/active/deactivating) + * @soft_iface: net device which holds this struct as private data + * @stats: structure holding the data for the ndo_get_stats() call + * @bat_counters: mesh internal traffic statistic counters (see batadv_counters) + * @aggregated_ogms: bool indicating whether OGM aggregation is enabled + * @bonding: bool indicating whether traffic bonding is enabled + * @fragmentation: bool indicating whether traffic fragmentation is enabled + * @ap_isolation: bool indicating whether ap isolation is enabled + * @bridge_loop_avoidance: bool indicating whether bridge loop avoidance is + * enabled + * @distributed_arp_table: bool indicating whether distributed ARP table is + * enabled + * @vis_mode: vis operation: client or server (see batadv_vis_packettype) + * @gw_mode: gateway operation: off, client or server (see batadv_gw_modes) + * @gw_sel_class: gateway selection class (applies if gw_mode client) + * @gw_bandwidth: gateway announced bandwidth (applies if gw_mode server) + * @orig_interval: OGM broadcast interval in milliseconds + * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop + * @log_level: configured log level (see batadv_dbg_level) + * @bcast_seqno: last sent broadcast packet sequence number + * @bcast_queue_left: number of remaining buffered broadcast packet slots + * @batman_queue_left: number of remaining OGM packet slots + * @num_ifaces: number of interfaces assigned to this mesh interface + * @mesh_obj: kobject for sysfs mesh subdirectory + * @debug_dir: dentry for debugfs batman-adv subdirectory + * @forw_bat_list: list of aggregated OGMs that will be forwarded + * @forw_bcast_list: list of broadcast packets that will be rebroadcasted + * @orig_hash: hash table containing mesh participants (orig nodes) + * @forw_bat_list_lock: lock protecting forw_bat_list + * @forw_bcast_list_lock: lock protecting forw_bcast_list + * @orig_work: work queue callback item for orig node purging + * @cleanup_work: work queue callback item for soft interface deinit + * @primary_if: one of the hard interfaces assigned to this mesh interface + * becomes the primary interface + * @bat_algo_ops: routing algorithm used by this mesh interface + * @bla: bridge loope avoidance data + * @debug_log: holding debug logging relevant data + * @gw: gateway data + * @tt: translation table data + * @vis: vis data + * @dat: distributed arp table data + */ struct batadv_priv { atomic_t mesh_state; + struct net_device *soft_iface; struct net_device_stats stats; uint64_t __percpu *bat_counters; /* Per cpu counters */ - atomic_t aggregated_ogms; /* boolean */ - atomic_t bonding; /* boolean */ - atomic_t fragmentation; /* boolean */ - atomic_t ap_isolation; /* boolean */ - atomic_t bridge_loop_avoidance; /* boolean */ + atomic_t aggregated_ogms; + atomic_t bonding; + atomic_t fragmentation; + atomic_t ap_isolation; +#ifdef CONFIG_BATMAN_ADV_BLA + atomic_t bridge_loop_avoidance; +#endif #ifdef CONFIG_BATMAN_ADV_DAT - atomic_t distributed_arp_table; /* boolean */ + atomic_t distributed_arp_table; +#endif + atomic_t vis_mode; + atomic_t gw_mode; + atomic_t gw_sel_class; + atomic_t gw_bandwidth; + atomic_t orig_interval; + atomic_t hop_penalty; +#ifdef CONFIG_BATMAN_ADV_DEBUG + atomic_t log_level; #endif - atomic_t vis_mode; /* VIS_TYPE_* */ - atomic_t gw_mode; /* GW_MODE_* */ - atomic_t gw_sel_class; /* uint */ - atomic_t gw_bandwidth; /* gw bandwidth */ - atomic_t orig_interval; /* uint */ - atomic_t hop_penalty; /* uint */ - atomic_t log_level; /* uint */ atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; char num_ifaces; - struct batadv_debug_log *debug_log; struct kobject *mesh_obj; struct dentry *debug_dir; struct hlist_head forw_bat_list; @@ -297,11 +507,15 @@ struct batadv_priv { spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ spinlock_t forw_bcast_list_lock; /* protects forw_bcast_list */ struct delayed_work orig_work; + struct work_struct cleanup_work; struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */ struct batadv_algo_ops *bat_algo_ops; #ifdef CONFIG_BATMAN_ADV_BLA struct batadv_priv_bla bla; #endif +#ifdef CONFIG_BATMAN_ADV_DEBUG + struct batadv_priv_debug_log *debug_log; +#endif struct batadv_priv_gw gw; struct batadv_priv_tt tt; struct batadv_priv_vis vis; @@ -310,21 +524,97 @@ struct batadv_priv { #endif }; +/** + * struct batadv_socket_client - layer2 icmp socket client data + * @queue_list: packet queue for packets destined for this socket client + * @queue_len: number of packets in the packet queue (queue_list) + * @index: socket client's index in the batadv_socket_client_hash + * @lock: lock protecting queue_list, queue_len & index + * @queue_wait: socket client's wait queue + * @bat_priv: pointer to soft_iface this client belongs to + */ struct batadv_socket_client { struct list_head queue_list; unsigned int queue_len; unsigned char index; - spinlock_t lock; /* protects queue_list, queue_len, index */ + spinlock_t lock; /* protects queue_list, queue_len & index */ wait_queue_head_t queue_wait; struct batadv_priv *bat_priv; }; +/** + * struct batadv_socket_packet - layer2 icmp packet for socket client + * @list: list node for batadv_socket_client::queue_list + * @icmp_len: size of the layer2 icmp packet + * @icmp_packet: layer2 icmp packet + */ struct batadv_socket_packet { struct list_head list; size_t icmp_len; struct batadv_icmp_packet_rr icmp_packet; }; +/** + * struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN + * @orig: originator address of backbone node (mac address of primary iface) + * @vid: vlan id this gateway was detected on + * @hash_entry: hlist node for batadv_priv_bla::backbone_hash + * @bat_priv: pointer to soft_iface this backbone gateway belongs to + * @lasttime: last time we heard of this backbone gw + * @wait_periods: grace time for bridge forward delays and bla group forming at + * bootup phase - no bcast traffic is formwared until it has elapsed + * @request_sent: if this bool is set to true we are out of sync with this + * backbone gateway - no bcast traffic is formwared until the situation was + * resolved + * @crc: crc16 checksum over all claims + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ +#ifdef CONFIG_BATMAN_ADV_BLA +struct batadv_bla_backbone_gw { + uint8_t orig[ETH_ALEN]; + short vid; + struct hlist_node hash_entry; + struct batadv_priv *bat_priv; + unsigned long lasttime; + atomic_t wait_periods; + atomic_t request_sent; + uint16_t crc; + atomic_t refcount; + struct rcu_head rcu; +}; + +/** + * struct batadv_bla_claim - claimed non-mesh client structure + * @addr: mac address of claimed non-mesh client + * @vid: vlan id this client was detected on + * @batadv_bla_backbone_gw: pointer to backbone gw claiming this client + * @lasttime: last time we heard of claim (locals only) + * @hash_entry: hlist node for batadv_priv_bla::claim_hash + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ +struct batadv_bla_claim { + uint8_t addr[ETH_ALEN]; + short vid; + struct batadv_bla_backbone_gw *backbone_gw; + unsigned long lasttime; + struct hlist_node hash_entry; + struct rcu_head rcu; + atomic_t refcount; +}; +#endif + +/** + * struct batadv_tt_common_entry - tt local & tt global common data + * @addr: mac address of non-mesh client + * @hash_entry: hlist node for batadv_priv_tt::local_hash or for + * batadv_priv_tt::global_hash + * @flags: various state handling flags (see batadv_tt_client_flags) + * @added_at: timestamp used for purging stale tt common entries + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ struct batadv_tt_common_entry { uint8_t addr[ETH_ALEN]; struct hlist_node hash_entry; @@ -334,62 +624,76 @@ struct batadv_tt_common_entry { struct rcu_head rcu; }; +/** + * struct batadv_tt_local_entry - translation table local entry data + * @common: general translation table data + * @last_seen: timestamp used for purging stale tt local entries + */ struct batadv_tt_local_entry { struct batadv_tt_common_entry common; unsigned long last_seen; }; +/** + * struct batadv_tt_global_entry - translation table global entry data + * @common: general translation table data + * @orig_list: list of orig nodes announcing this non-mesh client + * @list_lock: lock protecting orig_list + * @roam_at: time at which TT_GLOBAL_ROAM was set + */ struct batadv_tt_global_entry { struct batadv_tt_common_entry common; struct hlist_head orig_list; - spinlock_t list_lock; /* protects the list */ - unsigned long roam_at; /* time at which TT_GLOBAL_ROAM was set */ + spinlock_t list_lock; /* protects orig_list */ + unsigned long roam_at; }; +/** + * struct batadv_tt_orig_list_entry - orig node announcing a non-mesh client + * @orig_node: pointer to orig node announcing this non-mesh client + * @ttvn: translation table version number which added the non-mesh client + * @list: list node for batadv_tt_global_entry::orig_list + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ struct batadv_tt_orig_list_entry { struct batadv_orig_node *orig_node; uint8_t ttvn; - atomic_t refcount; - struct rcu_head rcu; struct hlist_node list; -}; - -#ifdef CONFIG_BATMAN_ADV_BLA -struct batadv_backbone_gw { - uint8_t orig[ETH_ALEN]; - short vid; /* used VLAN ID */ - struct hlist_node hash_entry; - struct batadv_priv *bat_priv; - unsigned long lasttime; /* last time we heard of this backbone gw */ - atomic_t wait_periods; - atomic_t request_sent; atomic_t refcount; struct rcu_head rcu; - uint16_t crc; /* crc checksum over all claims */ -}; - -struct batadv_claim { - uint8_t addr[ETH_ALEN]; - short vid; - struct batadv_backbone_gw *backbone_gw; - unsigned long lasttime; /* last time we heard of claim (locals only) */ - struct rcu_head rcu; - atomic_t refcount; - struct hlist_node hash_entry; }; -#endif +/** + * struct batadv_tt_change_node - structure for tt changes occured + * @list: list node for batadv_priv_tt::changes_list + * @change: holds the actual translation table diff data + */ struct batadv_tt_change_node { struct list_head list; struct batadv_tt_change change; }; +/** + * struct batadv_tt_req_node - data to keep track of the tt requests in flight + * @addr: mac address address of the originator this request was sent to + * @issued_at: timestamp used for purging stale tt requests + * @list: list node for batadv_priv_tt::req_list + */ struct batadv_tt_req_node { uint8_t addr[ETH_ALEN]; unsigned long issued_at; struct list_head list; }; +/** + * struct batadv_tt_roam_node - roaming client data + * @addr: mac address of the client in the roaming phase + * @counter: number of allowed roaming events per client within a single + * OGM interval (changes are committed with each OGM) + * @first_time: timestamp used for purging stale roaming node entries + * @list: list node for batadv_priv_tt::roam_list + */ struct batadv_tt_roam_node { uint8_t addr[ETH_ALEN]; atomic_t counter; @@ -397,8 +701,19 @@ struct batadv_tt_roam_node { struct list_head list; }; -/* forw_packet - structure for forw_list maintaining packets to be - * send/forwarded +/** + * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded + * @list: list node for batadv_socket_client::queue_list + * @send_time: execution time for delayed_work (packet sending) + * @own: bool for locally generated packets (local OGMs are re-scheduled after + * sending) + * @skb: bcast packet's skb buffer + * @packet_len: size of aggregated OGM packet inside the skb buffer + * @direct_link_flags: direct link flags for aggregated OGM packets + * @num_packets: counter for bcast packet retransmission + * @delayed_work: work queue callback item for packet sending + * @if_incoming: pointer incoming hard-iface or primary iface if locally + * generated packet */ struct batadv_forw_packet { struct hlist_node list; @@ -412,72 +727,98 @@ struct batadv_forw_packet { struct batadv_hard_iface *if_incoming; }; -/* While scanning for vis-entries of a particular vis-originator - * this list collects its interfaces to create a subgraph/cluster - * out of them later +/** + * struct batadv_frag_packet_list_entry - storage for fragment packet + * @list: list node for orig_node::frag_list + * @seqno: sequence number of the fragment + * @skb: fragment's skb buffer */ -struct batadv_if_list_entry { - uint8_t addr[ETH_ALEN]; - bool primary; - struct hlist_node list; -}; - -struct batadv_debug_log { - char log_buff[BATADV_LOG_BUF_LEN]; - unsigned long log_start; - unsigned long log_end; - spinlock_t lock; /* protects log_buff, log_start and log_end */ - wait_queue_head_t queue_wait; -}; - struct batadv_frag_packet_list_entry { struct list_head list; uint16_t seqno; struct sk_buff *skb; }; +/** + * struct batadv_vis_info - local data for vis information + * @first_seen: timestamp used for purging stale vis info entries + * @recv_list: List of server-neighbors we have received this packet from. This + * packet should not be re-forward to them again. List elements are struct + * batadv_vis_recvlist_node + * @send_list: list of packets to be forwarded + * @refcount: number of contexts the object is used + * @hash_entry: hlist node for batadv_priv_vis::hash + * @bat_priv: pointer to soft_iface this orig node belongs to + * @skb_packet: contains the vis packet + */ struct batadv_vis_info { unsigned long first_seen; - /* list of server-neighbors we received a vis-packet - * from. we should not reply to them. - */ struct list_head recv_list; struct list_head send_list; struct kref refcount; struct hlist_node hash_entry; struct batadv_priv *bat_priv; - /* this packet might be part of the vis send queue. */ struct sk_buff *skb_packet; - /* vis_info may follow here */ } __packed; +/** + * struct batadv_vis_info_entry - contains link information for vis + * @src: source MAC of the link, all zero for local TT entry + * @dst: destination MAC of the link, client mac address for local TT entry + * @quality: transmission quality of the link, or 0 for local TT entry + */ struct batadv_vis_info_entry { uint8_t src[ETH_ALEN]; uint8_t dest[ETH_ALEN]; - uint8_t quality; /* quality = 0 client */ + uint8_t quality; } __packed; -struct batadv_recvlist_node { +/** + * struct batadv_vis_recvlist_node - list entry for batadv_vis_info::recv_list + * @list: list node for batadv_vis_info::recv_list + * @mac: MAC address of the originator from where the vis_info was received + */ +struct batadv_vis_recvlist_node { struct list_head list; uint8_t mac[ETH_ALEN]; }; +/** + * struct batadv_vis_if_list_entry - auxiliary data for vis data generation + * @addr: MAC address of the interface + * @primary: true if this interface is the primary interface + * @list: list node the interface list + * + * While scanning for vis-entries of a particular vis-originator + * this list collects its interfaces to create a subgraph/cluster + * out of them later + */ +struct batadv_vis_if_list_entry { + uint8_t addr[ETH_ALEN]; + bool primary; + struct hlist_node list; +}; + +/** + * struct batadv_algo_ops - mesh algorithm callbacks + * @list: list node for the batadv_algo_list + * @name: name of the algorithm + * @bat_iface_enable: init routing info when hard-interface is enabled + * @bat_iface_disable: de-init routing info when hard-interface is disabled + * @bat_iface_update_mac: (re-)init mac addresses of the protocol information + * belonging to this hard-interface + * @bat_primary_iface_set: called when primary interface is selected / changed + * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue + * @bat_ogm_emit: send scheduled OGM + */ struct batadv_algo_ops { struct hlist_node list; char *name; - /* init routing info when hard-interface is enabled */ int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface); - /* de-init routing info when hard-interface is disabled */ void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface); - /* (re-)init mac addresses of the protocol information - * belonging to this hard-interface - */ void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface); - /* called when primary interface is selected / changed */ void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface); - /* prepare a new outgoing OGM for the send queue */ void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface); - /* send scheduled OGM */ void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet); }; diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 10aff49..50e079f 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: * * Andreas Langer * @@ -133,7 +133,6 @@ batadv_frag_search_packet(struct list_head *head, is_head = !!(up->flags & BATADV_UNI_FRAG_HEAD); list_for_each_entry(tfp, head, list) { - if (!tfp->skb) continue; @@ -162,7 +161,6 @@ void batadv_frag_list_free(struct list_head *head) struct batadv_frag_packet_list_entry *pf, *tmp_pf; if (!list_empty(head)) { - list_for_each_entry_safe(pf, tmp_pf, head, list) { kfree_skb(pf->skb); list_del(&pf->list); diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h index 61abba5..429cf8a 100644 --- a/net/batman-adv/unicast.h +++ b/net/batman-adv/unicast.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: * * Andreas Langer * diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 0f65a9d..6a1e646 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2008-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2008-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich * @@ -28,14 +28,15 @@ #define BATADV_MAX_VIS_PACKET_SIZE 1000 -static void batadv_start_vis_timer(struct batadv_priv *bat_priv); +/* hash class keys */ +static struct lock_class_key batadv_vis_hash_lock_class_key; /* free the info */ static void batadv_free_info(struct kref *ref) { struct batadv_vis_info *info; struct batadv_priv *bat_priv; - struct batadv_recvlist_node *entry, *tmp; + struct batadv_vis_recvlist_node *entry, *tmp; info = container_of(ref, struct batadv_vis_info, refcount); bat_priv = info->bat_priv; @@ -96,7 +97,6 @@ batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data) { struct batadv_hashtable *hash = bat_priv->vis.hash; struct hlist_head *head; - struct hlist_node *node; struct batadv_vis_info *vis_info, *vis_info_tmp = NULL; uint32_t index; @@ -107,8 +107,8 @@ batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data) head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(vis_info, node, head, hash_entry) { - if (!batadv_vis_info_cmp(node, data)) + hlist_for_each_entry_rcu(vis_info, head, hash_entry) { + if (!batadv_vis_info_cmp(&vis_info->hash_entry, data)) continue; vis_info_tmp = vis_info; @@ -126,10 +126,9 @@ static void batadv_vis_data_insert_interface(const uint8_t *interface, struct hlist_head *if_list, bool primary) { - struct batadv_if_list_entry *entry; - struct hlist_node *pos; + struct batadv_vis_if_list_entry *entry; - hlist_for_each_entry(entry, pos, if_list, list) { + hlist_for_each_entry(entry, if_list, list) { if (batadv_compare_eth(entry->addr, interface)) return; } @@ -146,10 +145,9 @@ static void batadv_vis_data_insert_interface(const uint8_t *interface, static void batadv_vis_data_read_prim_sec(struct seq_file *seq, const struct hlist_head *if_list) { - struct batadv_if_list_entry *entry; - struct hlist_node *pos; + struct batadv_vis_if_list_entry *entry; - hlist_for_each_entry(entry, pos, if_list, list) { + hlist_for_each_entry(entry, if_list, list) { if (entry->primary) seq_printf(seq, "PRIMARY, "); else @@ -196,10 +194,9 @@ static void batadv_vis_data_read_entries(struct seq_file *seq, struct batadv_vis_info_entry *entries) { int i; - struct batadv_if_list_entry *entry; - struct hlist_node *pos; + struct batadv_vis_if_list_entry *entry; - hlist_for_each_entry(entry, pos, list, list) { + hlist_for_each_entry(entry, list, list) { seq_printf(seq, "%pM,", entry->addr); for (i = 0; i < packet->entries; i++) @@ -217,17 +214,16 @@ static void batadv_vis_data_read_entries(struct seq_file *seq, static void batadv_vis_seq_print_text_bucket(struct seq_file *seq, const struct hlist_head *head) { - struct hlist_node *node; struct batadv_vis_info *info; struct batadv_vis_packet *packet; uint8_t *entries_pos; struct batadv_vis_info_entry *entries; - struct batadv_if_list_entry *entry; - struct hlist_node *pos, *n; + struct batadv_vis_if_list_entry *entry; + struct hlist_node *n; HLIST_HEAD(vis_if_list); - hlist_for_each_entry_rcu(info, node, head, hash_entry) { + hlist_for_each_entry_rcu(info, head, hash_entry) { packet = (struct batadv_vis_packet *)info->skb_packet->data; entries_pos = (uint8_t *)packet + sizeof(*packet); entries = (struct batadv_vis_info_entry *)entries_pos; @@ -239,7 +235,7 @@ static void batadv_vis_seq_print_text_bucket(struct seq_file *seq, batadv_vis_data_read_entries(seq, &vis_if_list, packet, entries); - hlist_for_each_entry_safe(entry, pos, n, &vis_if_list, list) { + hlist_for_each_entry_safe(entry, n, &vis_if_list, list) { hlist_del(&entry->list); kfree(entry); } @@ -304,7 +300,7 @@ static void batadv_send_list_del(struct batadv_vis_info *info) static void batadv_recv_list_add(struct batadv_priv *bat_priv, struct list_head *recv_list, const char *mac) { - struct batadv_recvlist_node *entry; + struct batadv_vis_recvlist_node *entry; entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) @@ -321,7 +317,7 @@ static int batadv_recv_list_is_in(struct batadv_priv *bat_priv, const struct list_head *recv_list, const char *mac) { - const struct batadv_recvlist_node *entry; + const struct batadv_vis_recvlist_node *entry; spin_lock_bh(&bat_priv->vis.list_lock); list_for_each_entry(entry, recv_list, list) { @@ -481,7 +477,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, /* Are we the target for this VIS packet? */ if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC && - batadv_is_my_mac(vis_packet->target_orig)) + batadv_is_my_mac(bat_priv, vis_packet->target_orig)) are_target = 1; spin_lock_bh(&bat_priv->vis.hash_lock); @@ -500,7 +496,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, batadv_send_list_add(bat_priv, info); /* ... we're not the recipient (and thus need to forward). */ - } else if (!batadv_is_my_mac(packet->target_orig)) { + } else if (!batadv_is_my_mac(bat_priv, packet->target_orig)) { batadv_send_list_add(bat_priv, info); } @@ -518,7 +514,6 @@ static int batadv_find_best_vis_server(struct batadv_priv *bat_priv, { struct batadv_hashtable *hash = bat_priv->orig_hash; struct batadv_neigh_node *router; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; struct batadv_vis_packet *packet; @@ -531,7 +526,7 @@ static int batadv_find_best_vis_server(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { router = batadv_orig_node_get_router(orig_node); if (!router) continue; @@ -570,7 +565,6 @@ static bool batadv_vis_packet_full(const struct batadv_vis_info *info) static int batadv_generate_vis_packet(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; struct batadv_neigh_node *router; @@ -604,7 +598,7 @@ static int batadv_generate_vis_packet(struct batadv_priv *bat_priv) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { router = batadv_orig_node_get_router(orig_node); if (!router) continue; @@ -643,7 +637,7 @@ next: head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, head, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { packet_pos = skb_put(info->skb_packet, sizeof(*entry)); entry = (struct batadv_vis_info_entry *)packet_pos; @@ -672,14 +666,14 @@ static void batadv_purge_vis_packets(struct batadv_priv *bat_priv) { uint32_t i; struct batadv_hashtable *hash = bat_priv->vis.hash; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; struct batadv_vis_info *info; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry_safe(info, node, node_tmp, + hlist_for_each_entry_safe(info, node_tmp, head, hash_entry) { /* never purge own data. */ if (info == bat_priv->vis.my_info) @@ -687,7 +681,7 @@ static void batadv_purge_vis_packets(struct batadv_priv *bat_priv) if (batadv_has_timed_out(info->first_seen, BATADV_VIS_TIMEOUT)) { - hlist_del(node); + hlist_del(&info->hash_entry); batadv_send_list_del(info); kref_put(&info->refcount, batadv_free_info); } @@ -699,7 +693,6 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, struct batadv_vis_info *info) { struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; struct batadv_vis_packet *packet; @@ -714,7 +707,7 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { /* if it's a vis server and reachable, send it. */ if (!(orig_node->flags & BATADV_VIS_SERVER)) continue; @@ -827,7 +820,9 @@ static void batadv_send_vis_packets(struct work_struct *work) kref_put(&info->refcount, batadv_free_info); } spin_unlock_bh(&bat_priv->vis.hash_lock); - batadv_start_vis_timer(bat_priv); + + queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work, + msecs_to_jiffies(BATADV_VIS_INTERVAL)); } /* init the vis server. this may only be called when if_list is already @@ -852,6 +847,9 @@ int batadv_vis_init(struct batadv_priv *bat_priv) goto err; } + batadv_hash_set_lock_class(bat_priv->vis.hash, + &batadv_vis_hash_lock_class_key); + bat_priv->vis.my_info = kmalloc(BATADV_MAX_VIS_PACKET_SIZE, GFP_ATOMIC); if (!bat_priv->vis.my_info) goto err; @@ -894,7 +892,11 @@ int batadv_vis_init(struct batadv_priv *bat_priv) } spin_unlock_bh(&bat_priv->vis.hash_lock); - batadv_start_vis_timer(bat_priv); + + INIT_DELAYED_WORK(&bat_priv->vis.work, batadv_send_vis_packets); + queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work, + msecs_to_jiffies(BATADV_VIS_INTERVAL)); + return 0; free_info: @@ -931,11 +933,3 @@ void batadv_vis_quit(struct batadv_priv *bat_priv) bat_priv->vis.my_info = NULL; spin_unlock_bh(&bat_priv->vis.hash_lock); } - -/* schedule packets for (re)transmission */ -static void batadv_start_vis_timer(struct batadv_priv *bat_priv) -{ - INIT_DELAYED_WORK(&bat_priv->vis.work, batadv_send_vis_packets); - queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work, - msecs_to_jiffies(BATADV_VIS_INTERVAL)); -} diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h index 873282f..ad92b0e 100644 --- a/net/batman-adv/vis.h +++ b/net/batman-adv/vis.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2008-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2008-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 2f67d5e..eb0f4b1 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -290,7 +290,7 @@ static int a2mp_getinfo_req(struct amp_mgr *mgr, struct sk_buff *skb, goto done; } - mgr->state = READ_LOC_AMP_INFO; + set_bit(READ_LOC_AMP_INFO, &mgr->state); hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); done: @@ -499,8 +499,16 @@ send_rsp: if (hdev) hci_dev_put(hdev); - a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, hdr->ident, sizeof(rsp), - &rsp); + /* Reply error now and success after HCI Write Remote AMP Assoc + command complete with success status + */ + if (rsp.status != A2MP_STATUS_SUCCESS) { + a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, hdr->ident, + sizeof(rsp), &rsp); + } else { + set_bit(WRITE_REMOTE_AMP_ASSOC, &mgr->state); + mgr->ident = hdr->ident; + } skb_pull(skb, le16_to_cpu(hdr->len)); return 0; @@ -840,7 +848,7 @@ struct amp_mgr *amp_mgr_lookup_by_state(u8 state) mutex_lock(&_mgr_list_lock); list_for_each_entry(mgr, &_mgr_list, list) { - if (mgr->state == state) { + if (test_and_clear_bit(state, &mgr->state)) { amp_mgr_get(mgr); mutex_unlock(&_mgr_list_lock); return mgr; @@ -949,6 +957,32 @@ clean: kfree(req); } +void a2mp_send_create_phy_link_rsp(struct hci_dev *hdev, u8 status) +{ + struct amp_mgr *mgr; + struct a2mp_physlink_rsp rsp; + struct hci_conn *hs_hcon; + + mgr = amp_mgr_lookup_by_state(WRITE_REMOTE_AMP_ASSOC); + if (!mgr) + return; + + hs_hcon = hci_conn_hash_lookup_state(hdev, AMP_LINK, BT_CONNECT); + if (!hs_hcon) { + rsp.status = A2MP_STATUS_UNABLE_START_LINK_CREATION; + } else { + rsp.remote_id = hs_hcon->remote_id; + rsp.status = A2MP_STATUS_SUCCESS; + } + + BT_DBG("%s mgr %p hs_hcon %p status %u", hdev->name, mgr, hs_hcon, + status); + + rsp.local_id = hdev->id; + a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, mgr->ident, sizeof(rsp), &rsp); + amp_mgr_put(mgr); +} + void a2mp_discover_amp(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 5355df6..0d1b08c 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -230,6 +230,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & (MSG_OOB)) return -EOPNOTSUPP; + msg->msg_namelen = 0; + skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -237,8 +239,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; @@ -641,7 +641,7 @@ int bt_procfs_init(struct module* module, struct net *net, const char *name, sk_list->fops.llseek = seq_lseek; sk_list->fops.release = seq_release_private; - pde = proc_net_fops_create(net, name, 0, &sk_list->fops); + pde = proc_create(name, 0, net->proc_net, &sk_list->fops); if (!pde) return -ENOMEM; @@ -652,7 +652,7 @@ int bt_procfs_init(struct module* module, struct net *net, const char *name, void bt_procfs_cleanup(struct net *net, const char *name) { - proc_net_remove(net, name); + remove_proc_entry(name, net->proc_net); } #else int bt_procfs_init(struct module* module, struct net *net, const char *name, diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c index 1b0d92c..d459ed4 100644 --- a/net/bluetooth/amp.c +++ b/net/bluetooth/amp.c @@ -236,7 +236,7 @@ void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr) cp.max_len = cpu_to_le16(hdev->amp_assoc_size); - mgr->state = READ_LOC_AMP_ASSOC; + set_bit(READ_LOC_AMP_ASSOC, &mgr->state); hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp); } @@ -250,7 +250,7 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev, cp.len_so_far = cpu_to_le16(0); cp.max_len = cpu_to_le16(hdev->amp_assoc_size); - mgr->state = READ_LOC_AMP_ASSOC_FINAL; + set_bit(READ_LOC_AMP_ASSOC_FINAL, &mgr->state); /* Read Local AMP Assoc final link information data */ hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp); @@ -317,7 +317,9 @@ void amp_write_rem_assoc_continue(struct hci_dev *hdev, u8 handle) if (!hcon) return; - amp_write_rem_assoc_frag(hdev, hcon); + /* Send A2MP create phylink rsp when all fragments are written */ + if (amp_write_rem_assoc_frag(hdev, hcon)) + a2mp_send_create_phy_link_rsp(hdev, 0); } void amp_write_remote_assoc(struct hci_dev *hdev, u8 handle) @@ -403,26 +405,20 @@ void amp_physical_cfm(struct hci_conn *bredr_hcon, struct hci_conn *hs_hcon) void amp_create_logical_link(struct l2cap_chan *chan) { + struct hci_conn *hs_hcon = chan->hs_hcon; struct hci_cp_create_accept_logical_link cp; - struct hci_conn *hcon; struct hci_dev *hdev; - BT_DBG("chan %p", chan); + BT_DBG("chan %p hs_hcon %p dst %pMR", chan, hs_hcon, chan->conn->dst); - if (!chan->hs_hcon) + if (!hs_hcon) return; hdev = hci_dev_hold(chan->hs_hcon->hdev); if (!hdev) return; - BT_DBG("chan %p dst %pMR", chan, chan->conn->dst); - - hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK, chan->conn->dst); - if (!hcon) - goto done; - - cp.phy_handle = hcon->handle; + cp.phy_handle = hs_hcon->handle; cp.tx_flow_spec.id = chan->local_id; cp.tx_flow_spec.stype = chan->local_stype; @@ -438,14 +434,13 @@ void amp_create_logical_link(struct l2cap_chan *chan) cp.rx_flow_spec.acc_lat = cpu_to_le32(chan->remote_acc_lat); cp.rx_flow_spec.flush_to = cpu_to_le32(chan->remote_flush_to); - if (hcon->out) + if (hs_hcon->out) hci_send_cmd(hdev, HCI_OP_CREATE_LOGICAL_LINK, sizeof(cp), &cp); else hci_send_cmd(hdev, HCI_OP_ACCEPT_LOGICAL_LINK, sizeof(cp), &cp); -done: hci_dev_put(hdev); } diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index a5b6397..e430b1a 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -33,7 +33,6 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> -#include <net/bluetooth/l2cap.h> #include "bnep.h" diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 25bfce0..4925a02 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -249,12 +249,12 @@ static void hci_conn_disconnect(struct hci_conn *conn) __u8 reason = hci_proto_disconn_ind(conn); switch (conn->type) { - case ACL_LINK: - hci_acl_disconn(conn, reason); - break; case AMP_LINK: hci_amp_disconn(conn, reason); break; + default: + hci_acl_disconn(conn, reason); + break; } } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 596660d..60793e7 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1146,7 +1146,8 @@ static void hci_power_on(struct work_struct *work) return; if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) - schedule_delayed_work(&hdev->power_off, HCI_AUTO_OFF_TIMEOUT); + queue_delayed_work(hdev->req_workqueue, &hdev->power_off, + HCI_AUTO_OFF_TIMEOUT); if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) mgmt_index_added(hdev); @@ -1182,14 +1183,10 @@ static void hci_discov_off(struct work_struct *work) int hci_uuids_clear(struct hci_dev *hdev) { - struct list_head *p, *n; - - list_for_each_safe(p, n, &hdev->uuids) { - struct bt_uuid *uuid; - - uuid = list_entry(p, struct bt_uuid, list); + struct bt_uuid *uuid, *tmp; - list_del(p); + list_for_each_entry_safe(uuid, tmp, &hdev->uuids, list) { + list_del(&uuid->list); kfree(uuid); } @@ -1621,8 +1618,8 @@ static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval, if (err < 0) return err; - schedule_delayed_work(&hdev->le_scan_disable, - msecs_to_jiffies(timeout)); + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, + msecs_to_jiffies(timeout)); return 0; } @@ -1799,6 +1796,15 @@ int hci_register_dev(struct hci_dev *hdev) goto err; } + hdev->req_workqueue = alloc_workqueue(hdev->name, + WQ_HIGHPRI | WQ_UNBOUND | + WQ_MEM_RECLAIM, 1); + if (!hdev->req_workqueue) { + destroy_workqueue(hdev->workqueue); + error = -ENOMEM; + goto err; + } + error = hci_add_sysfs(hdev); if (error < 0) goto err_wqueue; @@ -1821,12 +1827,13 @@ int hci_register_dev(struct hci_dev *hdev) hci_notify(hdev, HCI_DEV_REG); hci_dev_hold(hdev); - schedule_work(&hdev->power_on); + queue_work(hdev->req_workqueue, &hdev->power_on); return id; err_wqueue: destroy_workqueue(hdev->workqueue); + destroy_workqueue(hdev->req_workqueue); err: ida_simple_remove(&hci_index_ida, hdev->id); write_lock(&hci_dev_list_lock); @@ -1880,6 +1887,7 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_del_sysfs(hdev); destroy_workqueue(hdev->workqueue); + destroy_workqueue(hdev->req_workqueue); hci_dev_lock(hdev); hci_blacklist_clear(hdev); @@ -1921,7 +1929,7 @@ int hci_recv_frame(struct sk_buff *skb) return -ENXIO; } - /* Incomming skb */ + /* Incoming skb */ bt_cb(skb)->incoming = 1; /* Time stamp */ @@ -2810,14 +2818,6 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) if (conn) { hci_conn_enter_active_mode(conn, BT_POWER_FORCE_ACTIVE_OFF); - hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags) && - !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) - mgmt_device_connected(hdev, &conn->dst, conn->type, - conn->dst_type, 0, NULL, 0, - conn->dev_class); - hci_dev_unlock(hdev); - /* Send to upper protocol */ l2cap_recv_acldata(conn, skb, flags); return; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 705078a..477726a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -609,8 +609,17 @@ static void le_setup(struct hci_dev *hdev) /* Read LE Buffer Size */ hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL); + /* Read LE Local Supported Features */ + hci_send_cmd(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL); + /* Read LE Advertising Channel TX Power */ hci_send_cmd(hdev, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); + + /* Read LE White List Size */ + hci_send_cmd(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL); + + /* Read LE Supported States */ + hci_send_cmd(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); } static void hci_setup(struct hci_dev *hdev) @@ -1090,6 +1099,19 @@ static void hci_cc_le_read_buffer_size(struct hci_dev *hdev, hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status); } +static void hci_cc_le_read_local_features(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_local_features *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (!rp->status) + memcpy(hdev->le_features, rp->features, 8); + + hci_req_complete(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, rp->status); +} + static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, struct sk_buff *skb) { @@ -1290,6 +1312,19 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, } } +static void hci_cc_le_read_white_list_size(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_white_list_size *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x size %u", hdev->name, rp->status, rp->size); + + if (!rp->status) + hdev->le_white_list_size = rp->size; + + hci_req_complete(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, rp->status); +} + static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_le_ltk_reply *rp = (void *) skb->data; @@ -1314,6 +1349,19 @@ static void hci_cc_le_ltk_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) hci_req_complete(hdev, HCI_OP_LE_LTK_NEG_REPLY, rp->status); } +static void hci_cc_le_read_supported_states(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_supported_states *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (!rp->status) + memcpy(hdev->le_states, rp->le_states, 8); + + hci_req_complete(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, rp->status); +} + static void hci_cc_write_le_host_supported(struct hci_dev *hdev, struct sk_buff *skb) { @@ -2628,6 +2676,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_le_read_buffer_size(hdev, skb); break; + case HCI_OP_LE_READ_LOCAL_FEATURES: + hci_cc_le_read_local_features(hdev, skb); + break; + case HCI_OP_LE_READ_ADV_TX_POWER: hci_cc_le_read_adv_tx_power(hdev, skb); break; @@ -2664,6 +2716,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_le_set_scan_enable(hdev, skb); break; + case HCI_OP_LE_READ_WHITE_LIST_SIZE: + hci_cc_le_read_white_list_size(hdev, skb); + break; + case HCI_OP_LE_LTK_REPLY: hci_cc_le_ltk_reply(hdev, skb); break; @@ -2672,6 +2728,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_le_ltk_neg_reply(hdev, skb); break; + case HCI_OP_LE_READ_SUPPORTED_STATES: + hci_cc_le_read_supported_states(hdev, skb); + break; + case HCI_OP_WRITE_LE_HOST_SUPPORTED: hci_cc_write_le_host_supported(hdev, skb); break; @@ -2688,7 +2748,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (ev->opcode != HCI_OP_NOP) del_timer(&hdev->cmd_timer); - if (ev->ncmd) { + if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) queue_work(hdev->workqueue, &hdev->cmd_work); @@ -3928,8 +3988,6 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) void *ptr = &skb->data[1]; s8 rssi; - hci_dev_lock(hdev); - while (num_reports--) { struct hci_ev_le_advertising_info *ev = ptr; @@ -3939,8 +3997,6 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) ptr += sizeof(*ev) + ev->length + 1; } - - hci_dev_unlock(hdev); } static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 07f0739..6a93614 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -70,14 +70,13 @@ static struct bt_sock_list hci_sk_list = { void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) { struct sock *sk; - struct hlist_node *node; struct sk_buff *skb_copy = NULL; BT_DBG("hdev %p len %d", hdev, skb->len); read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { struct hci_filter *flt; struct sk_buff *nskb; @@ -142,13 +141,12 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk) { struct sock *sk; - struct hlist_node *node; BT_DBG("len %d", skb->len); read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; /* Skip the original socket */ @@ -176,7 +174,6 @@ void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk) void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) { struct sock *sk; - struct hlist_node *node; struct sk_buff *skb_copy = NULL; __le16 opcode; @@ -210,7 +207,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; if (sk->sk_state != BT_BOUND) @@ -251,13 +248,12 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) static void send_monitor_event(struct sk_buff *skb) { struct sock *sk; - struct hlist_node *node; BT_DBG("len %d", skb->len); read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; if (sk->sk_state != BT_BOUND) @@ -393,11 +389,10 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) if (event == HCI_DEV_UNREG) { struct sock *sk; - struct hlist_node *node; /* Detach sockets from device */ read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { bh_lock_sock_nested(sk); if (hci_pi(sk)->hdev == hdev) { hci_pi(sk)->hdev = NULL; diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 55cceee..23b4e24 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -2,6 +2,7 @@ #include <linux/debugfs.h> #include <linux/module.h> +#include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -461,19 +462,18 @@ static const struct file_operations blacklist_fops = { static void print_bt_uuid(struct seq_file *f, u8 *uuid) { - __be32 data0, data4; - __be16 data1, data2, data3, data5; + u32 data0, data5; + u16 data1, data2, data3, data4; - memcpy(&data0, &uuid[0], 4); - memcpy(&data1, &uuid[4], 2); - memcpy(&data2, &uuid[6], 2); - memcpy(&data3, &uuid[8], 2); - memcpy(&data4, &uuid[10], 4); - memcpy(&data5, &uuid[14], 2); + data5 = get_unaligned_le32(uuid); + data4 = get_unaligned_le16(uuid + 4); + data3 = get_unaligned_le16(uuid + 6); + data2 = get_unaligned_le16(uuid + 8); + data1 = get_unaligned_le16(uuid + 10); + data0 = get_unaligned_le32(uuid + 12); - seq_printf(f, "%.8x-%.4x-%.4x-%.4x-%.8x%.4x\n", - ntohl(data0), ntohs(data1), ntohs(data2), ntohs(data3), - ntohl(data4), ntohs(data5)); + seq_printf(f, "%.8x-%.4x-%.4x-%.4x-%.4x%.8x\n", + data0, data1, data2, data3, data4, data5); } static int uuids_show(struct seq_file *f, void *p) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index b2bcbe2..a7352ff 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -931,7 +931,7 @@ static int hidp_setup_hid(struct hidp_session *session, hid->version = req->version; hid->country = req->country; - strncpy(hid->name, req->name, 128); + strncpy(hid->name, req->name, sizeof(req->name) - 1); snprintf(hid->phys, sizeof(hid->phys), "%pMR", &bt_sk(session->ctrl_sock->sk)->src); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 2c78208..7c7e932 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1527,17 +1527,12 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan); switch (hcon->type) { - case AMP_LINK: - conn->mtu = hcon->hdev->block_mtu; - break; - case LE_LINK: if (hcon->hdev->le_mtu) { conn->mtu = hcon->hdev->le_mtu; break; } /* fall through */ - default: conn->mtu = hcon->hdev->acl_mtu; break; @@ -3727,6 +3722,17 @@ sendresp: static int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) { + struct hci_dev *hdev = conn->hcon->hdev; + struct hci_conn *hcon = conn->hcon; + + hci_dev_lock(hdev); + if (test_bit(HCI_MGMT, &hdev->dev_flags) && + !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags)) + mgmt_device_connected(hdev, &hcon->dst, hcon->type, + hcon->dst_type, 0, NULL, 0, + hcon->dev_class); + hci_dev_unlock(hdev); + l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP, 0); return 0; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f559b96..39395c7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -35,7 +35,7 @@ bool enable_hs; #define MGMT_VERSION 1 -#define MGMT_REVISION 2 +#define MGMT_REVISION 3 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -435,35 +435,117 @@ static u32 get_current_settings(struct hci_dev *hdev) #define PNP_INFO_SVCLASS_ID 0x1200 -static u8 bluetooth_base_uuid[] = { - 0xFB, 0x34, 0x9B, 0x5F, 0x80, 0x00, 0x00, 0x80, - 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -}; +static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 4) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + u16 uuid16; + + if (uuid->size != 16) + continue; + + uuid16 = get_unaligned_le16(&uuid->uuid[12]); + if (uuid16 < 0x1100) + continue; + + if (uuid16 == PNP_INFO_SVCLASS_ID) + continue; -static u16 get_uuid16(u8 *uuid128) + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID16_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + sizeof(u16) > len) { + uuids_start[1] = EIR_UUID16_SOME; + break; + } + + *ptr++ = (uuid16 & 0x00ff); + *ptr++ = (uuid16 & 0xff00) >> 8; + uuids_start[0] += sizeof(uuid16); + } + + return ptr; +} + +static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) { - u32 val; - int i; + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 6) + return ptr; - for (i = 0; i < 12; i++) { - if (bluetooth_base_uuid[i] != uuid128[i]) - return 0; + list_for_each_entry(uuid, &hdev->uuids, list) { + if (uuid->size != 32) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID32_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + sizeof(u32) > len) { + uuids_start[1] = EIR_UUID32_SOME; + break; + } + + memcpy(ptr, &uuid->uuid[12], sizeof(u32)); + ptr += sizeof(u32); + uuids_start[0] += sizeof(u32); } - val = get_unaligned_le32(&uuid128[12]); - if (val > 0xffff) - return 0; + return ptr; +} + +static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 18) + return ptr; - return (u16) val; + list_for_each_entry(uuid, &hdev->uuids, list) { + if (uuid->size != 128) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID128_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + 16 > len) { + uuids_start[1] = EIR_UUID128_SOME; + break; + } + + memcpy(ptr, uuid->uuid, 16); + ptr += 16; + uuids_start[0] += 16; + } + + return ptr; } static void create_eir(struct hci_dev *hdev, u8 *data) { u8 *ptr = data; - u16 eir_len = 0; - u16 uuid16_list[HCI_MAX_EIR_LENGTH / sizeof(u16)]; - int i, truncated = 0; - struct bt_uuid *uuid; size_t name_len; name_len = strlen(hdev->dev_name); @@ -481,7 +563,6 @@ static void create_eir(struct hci_dev *hdev, u8 *data) memcpy(ptr + 2, hdev->dev_name, name_len); - eir_len += (name_len + 2); ptr += (name_len + 2); } @@ -490,7 +571,6 @@ static void create_eir(struct hci_dev *hdev, u8 *data) ptr[1] = EIR_TX_POWER; ptr[2] = (u8) hdev->inq_tx_power; - eir_len += 3; ptr += 3; } @@ -503,60 +583,12 @@ static void create_eir(struct hci_dev *hdev, u8 *data) put_unaligned_le16(hdev->devid_product, ptr + 6); put_unaligned_le16(hdev->devid_version, ptr + 8); - eir_len += 10; ptr += 10; } - memset(uuid16_list, 0, sizeof(uuid16_list)); - - /* Group all UUID16 types */ - list_for_each_entry(uuid, &hdev->uuids, list) { - u16 uuid16; - - uuid16 = get_uuid16(uuid->uuid); - if (uuid16 == 0) - return; - - if (uuid16 < 0x1100) - continue; - - if (uuid16 == PNP_INFO_SVCLASS_ID) - continue; - - /* Stop if not enough space to put next UUID */ - if (eir_len + 2 + sizeof(u16) > HCI_MAX_EIR_LENGTH) { - truncated = 1; - break; - } - - /* Check for duplicates */ - for (i = 0; uuid16_list[i] != 0; i++) - if (uuid16_list[i] == uuid16) - break; - - if (uuid16_list[i] == 0) { - uuid16_list[i] = uuid16; - eir_len += sizeof(u16); - } - } - - if (uuid16_list[0] != 0) { - u8 *length = ptr; - - /* EIR Data type */ - ptr[1] = truncated ? EIR_UUID16_SOME : EIR_UUID16_ALL; - - ptr += 2; - eir_len += 2; - - for (i = 0; uuid16_list[i] != 0; i++) { - *ptr++ = (uuid16_list[i] & 0x00ff); - *ptr++ = (uuid16_list[i] & 0xff00) >> 8; - } - - /* EIR Data length */ - *length = (i * sizeof(u16)) + 1; - } + ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); + ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); + ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); } static int update_eir(struct hci_dev *hdev) @@ -728,13 +760,9 @@ static void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, void *data), void *data) { - struct list_head *p, *n; - - list_for_each_safe(p, n, &hdev->mgmt_pending) { - struct pending_cmd *cmd; - - cmd = list_entry(p, struct pending_cmd, list); + struct pending_cmd *cmd, *tmp; + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { if (opcode > 0 && cmd->opcode != opcode) continue; @@ -777,14 +805,19 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { cancel_delayed_work(&hdev->power_off); if (cp->val) { - err = send_settings_rsp(sk, MGMT_OP_SET_POWERED, hdev); - mgmt_powered(hdev, 1); + mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, + data, len); + err = mgmt_powered(hdev, 1); goto failed; } } @@ -807,9 +840,9 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, } if (cp->val) - schedule_work(&hdev->power_on); + queue_work(hdev->req_workqueue, &hdev->power_on); else - schedule_work(&hdev->power_off.work); + queue_work(hdev->req_workqueue, &hdev->power_off.work); err = 0; @@ -872,6 +905,10 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_INVALID_PARAMS); + timeout = __le16_to_cpu(cp->timeout); if (!cp->val && timeout > 0) return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, @@ -971,6 +1008,10 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, return cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { @@ -1041,6 +1082,10 @@ static int set_pairable(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_PAIRABLE, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); if (cp->val) @@ -1073,6 +1118,10 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data, return cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { @@ -1133,13 +1182,15 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) BT_DBG("request for %s", hdev->name); - hci_dev_lock(hdev); + if (!lmp_ssp_capable(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, + MGMT_STATUS_NOT_SUPPORTED); - if (!lmp_ssp_capable(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, - MGMT_STATUS_NOT_SUPPORTED); - goto failed; - } + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); val = !!cp->val; @@ -1199,6 +1250,10 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + MGMT_STATUS_INVALID_PARAMS); + if (cp->val) set_bit(HCI_HS_ENABLED, &hdev->dev_flags); else @@ -1217,13 +1272,15 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) BT_DBG("request for %s", hdev->name); - hci_dev_lock(hdev); + if (!lmp_le_capable(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, + MGMT_STATUS_NOT_SUPPORTED); - if (!lmp_le_capable(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_LE, - MGMT_STATUS_NOT_SUPPORTED); - goto unlock; - } + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); val = !!cp->val; enabled = lmp_host_le_capable(hdev); @@ -1275,6 +1332,25 @@ unlock: return err; } +static const u8 bluetooth_base_uuid[] = { + 0xfb, 0x34, 0x9b, 0x5f, 0x80, 0x00, 0x00, 0x80, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static u8 get_uuid_size(const u8 *uuid) +{ + u32 val; + + if (memcmp(uuid, bluetooth_base_uuid, 12)) + return 128; + + val = get_unaligned_le32(&uuid[12]); + if (val > 0xffff) + return 32; + + return 16; +} + static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_uuid *cp = data; @@ -1300,8 +1376,9 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) memcpy(uuid->uuid, cp->uuid, 16); uuid->svc_hint = cp->svc_hint; + uuid->size = get_uuid_size(cp->uuid); - list_add(&uuid->list, &hdev->uuids); + list_add_tail(&uuid->list, &hdev->uuids); err = update_class(hdev); if (err < 0) @@ -1332,7 +1409,8 @@ static bool enable_service_cache(struct hci_dev *hdev) return false; if (!test_and_set_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) { - schedule_delayed_work(&hdev->service_cache, CACHE_TIMEOUT); + queue_delayed_work(hdev->workqueue, &hdev->service_cache, + CACHE_TIMEOUT); return true; } @@ -1344,7 +1422,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_remove_uuid *cp = data; struct pending_cmd *cmd; - struct list_head *p, *n; + struct bt_uuid *match, *tmp; u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; int err, found; @@ -1372,9 +1450,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, found = 0; - list_for_each_safe(p, n, &hdev->uuids) { - struct bt_uuid *match = list_entry(p, struct bt_uuid, list); - + list_for_each_entry_safe(match, tmp, &hdev->uuids, list) { if (memcmp(match->uuid, cp->uuid, 16) != 0) continue; @@ -1422,13 +1498,19 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); - hci_dev_lock(hdev); + if (!lmp_bredr_capable(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_NOT_SUPPORTED); - if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, - MGMT_STATUS_BUSY); - goto unlock; - } + if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_BUSY); + + if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) + return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); hdev->major_class = cp->major; hdev->minor_class = cp->minor; @@ -1483,9 +1565,21 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, MGMT_STATUS_INVALID_PARAMS); } + if (cp->debug_keys != 0x00 && cp->debug_keys != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); + BT_DBG("%s debug_keys %u key_count %u", hdev->name, cp->debug_keys, key_count); + for (i = 0; i < key_count; i++) { + struct mgmt_link_key_info *key = &cp->keys[i]; + + if (key->addr.type != BDADDR_BREDR) + return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); + } + hci_dev_lock(hdev); hci_link_keys_clear(hdev); @@ -1533,12 +1627,22 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, struct hci_conn *conn; int err; - hci_dev_lock(hdev); - memset(&rp, 0, sizeof(rp)); bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); rp.addr.type = cp->addr.type; + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + + if (cp->disconnect != 0x00 && cp->disconnect != 0x01) + return cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + + hci_dev_lock(hdev); + if (!hdev_is_powered(hdev)) { err = cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); @@ -1596,6 +1700,7 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_disconnect *cp = data; + struct mgmt_rp_disconnect rp; struct hci_cp_disconnect dc; struct pending_cmd *cmd; struct hci_conn *conn; @@ -1603,17 +1708,26 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + hci_dev_lock(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_NOT_POWERED); + err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); goto failed; } if (mgmt_pending_find(MGMT_OP_DISCONNECT, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_BUSY); + err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_BUSY, &rp, sizeof(rp)); goto failed; } @@ -1624,8 +1738,8 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) { - err = cmd_status(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_NOT_CONNECTED); + err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp)); goto failed; } @@ -1903,11 +2017,20 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_PAIR_DEVICE, - MGMT_STATUS_NOT_POWERED); + err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); goto unlock; } @@ -1924,10 +2047,6 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_connect(hdev, LE_LINK, &cp->addr.bdaddr, cp->addr.type, sec_level, auth_type); - memset(&rp, 0, sizeof(rp)); - bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); - rp.addr.type = cp->addr.type; - if (IS_ERR(conn)) { int status; @@ -2254,24 +2373,16 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); - if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, - MGMT_STATUS_NOT_POWERED, &cp->addr, - sizeof(cp->addr)); - goto unlock; - } - err = hci_add_remote_oob_data(hdev, &cp->addr.bdaddr, cp->hash, cp->randomizer); if (err < 0) status = MGMT_STATUS_FAILED; else - status = 0; + status = MGMT_STATUS_SUCCESS; err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, status, &cp->addr, sizeof(cp->addr)); -unlock: hci_dev_unlock(hdev); return err; } @@ -2287,24 +2398,15 @@ static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); - if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_REMOVE_REMOTE_OOB_DATA, - MGMT_STATUS_NOT_POWERED, &cp->addr, - sizeof(cp->addr)); - goto unlock; - } - err = hci_remove_remote_oob_data(hdev, &cp->addr.bdaddr); if (err < 0) status = MGMT_STATUS_INVALID_PARAMS; else - status = 0; + status = MGMT_STATUS_SUCCESS; err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_REMOTE_OOB_DATA, status, &cp->addr, sizeof(cp->addr)); -unlock: hci_dev_unlock(hdev); return err; } @@ -2365,31 +2467,45 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, switch (hdev->discovery.type) { case DISCOV_TYPE_BREDR: - if (lmp_bredr_capable(hdev)) - err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR); - else - err = -ENOTSUPP; + if (!lmp_bredr_capable(hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_NOT_SUPPORTED); + mgmt_pending_remove(cmd); + goto failed; + } + + err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR); break; case DISCOV_TYPE_LE: - if (lmp_host_le_capable(hdev)) - err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, - LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY); - else - err = -ENOTSUPP; + if (!lmp_host_le_capable(hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_NOT_SUPPORTED); + mgmt_pending_remove(cmd); + goto failed; + } + + err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, + LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY); break; case DISCOV_TYPE_INTERLEAVED: - if (lmp_host_le_capable(hdev) && lmp_bredr_capable(hdev)) - err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, - LE_SCAN_WIN, - LE_SCAN_TIMEOUT_BREDR_LE); - else - err = -ENOTSUPP; + if (!lmp_host_le_capable(hdev) || !lmp_bredr_capable(hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_NOT_SUPPORTED); + mgmt_pending_remove(cmd); + goto failed; + } + + err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, LE_SCAN_WIN, + LE_SCAN_TIMEOUT_BREDR_LE); break; default: - err = -EINVAL; + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); + goto failed; } if (err < 0) @@ -2510,7 +2626,8 @@ static int confirm_name(struct sock *sk, struct hci_dev *hdev, void *data, hci_inquiry_cache_update_resolve(hdev, e); } - err = 0; + err = cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, 0, &cp->addr, + sizeof(cp->addr)); failed: hci_dev_unlock(hdev); @@ -2526,13 +2643,18 @@ static int block_device(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("%s", hdev->name); + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); + hci_dev_lock(hdev); err = hci_blacklist_add(hdev, &cp->addr.bdaddr, cp->addr.type); if (err < 0) status = MGMT_STATUS_FAILED; else - status = 0; + status = MGMT_STATUS_SUCCESS; err = cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, status, &cp->addr, sizeof(cp->addr)); @@ -2551,13 +2673,18 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("%s", hdev->name); + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); + hci_dev_lock(hdev); err = hci_blacklist_del(hdev, &cp->addr.bdaddr, cp->addr.type); if (err < 0) status = MGMT_STATUS_INVALID_PARAMS; else - status = 0; + status = MGMT_STATUS_SUCCESS; err = cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, status, &cp->addr, sizeof(cp->addr)); @@ -2612,6 +2739,10 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_INVALID_PARAMS); + if (!hdev_is_powered(hdev)) return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_STATUS_NOT_POWERED); @@ -2659,12 +2790,23 @@ done: return err; } +static bool ltk_is_valid(struct mgmt_ltk_info *key) +{ + if (key->authenticated != 0x00 && key->authenticated != 0x01) + return false; + if (key->master != 0x00 && key->master != 0x01) + return false; + if (!bdaddr_type_is_le(key->addr.type)) + return false; + return true; +} + static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, void *cp_data, u16 len) { struct mgmt_cp_load_long_term_keys *cp = cp_data; u16 key_count, expected_len; - int i; + int i, err; key_count = __le16_to_cpu(cp->key_count); @@ -2674,11 +2816,20 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, BT_ERR("load_keys: expected %u bytes, got %u bytes", len, expected_len); return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, - EINVAL); + MGMT_STATUS_INVALID_PARAMS); } BT_DBG("%s key_count %u", hdev->name, key_count); + for (i = 0; i < key_count; i++) { + struct mgmt_ltk_info *key = &cp->keys[i]; + + if (!ltk_is_valid(key)) + return cmd_status(sk, hdev->id, + MGMT_OP_LOAD_LONG_TERM_KEYS, + MGMT_STATUS_INVALID_PARAMS); + } + hci_dev_lock(hdev); hci_smp_ltks_clear(hdev); @@ -2698,9 +2849,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, key->enc_size, key->ediv, key->rand); } + err = cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0, + NULL, 0); + hci_dev_unlock(hdev); - return 0; + return err; } static const struct mgmt_handler { @@ -2915,6 +3069,8 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); if (powered) { + u8 link_sec; + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && !lmp_host_ssp_capable(hdev)) { u8 ssp = 1; @@ -2938,6 +3094,11 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) sizeof(cp), &cp); } + link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); + if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) + hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, + sizeof(link_sec), &link_sec); + if (lmp_bredr_capable(hdev)) { set_bredr_scan(hdev); update_class(hdev); @@ -2946,7 +3107,13 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) } } else { u8 status = MGMT_STATUS_NOT_POWERED; + u8 zero_cod[] = { 0, 0, 0 }; + mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status); + + if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) + mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, + zero_cod, sizeof(zero_cod), NULL); } err = new_settings(hdev, match.sk); diff --git a/net/bluetooth/rfcomm/Kconfig b/net/bluetooth/rfcomm/Kconfig index 22e718b..18d352e 100644 --- a/net/bluetooth/rfcomm/Kconfig +++ b/net/bluetooth/rfcomm/Kconfig @@ -12,6 +12,7 @@ config BT_RFCOMM config BT_RFCOMM_TTY bool "RFCOMM TTY support" depends on BT_RFCOMM + depends on TTY help This option enables TTY emulation support for RFCOMM channels. diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 201fdf7..b23e271 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -257,7 +257,7 @@ static void rfcomm_session_clear_timer(struct rfcomm_session *s) { BT_DBG("session %p state %ld", s, s->state); - if (timer_pending(&s->timer) && del_timer(&s->timer)) + if (del_timer(&s->timer)) rfcomm_session_put(s); } @@ -285,7 +285,7 @@ static void rfcomm_dlc_clear_timer(struct rfcomm_dlc *d) { BT_DBG("dlc %p state %ld", d, d->state); - if (timer_pending(&d->timer) && del_timer(&d->timer)) + if (del_timer(&d->timer)) rfcomm_dlc_put(d); } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index ce3f665..7c9224b 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -107,15 +107,14 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err) static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src) { struct sock *sk = NULL; - struct hlist_node *node; - sk_for_each(sk, node, &rfcomm_sk_list.head) { + sk_for_each(sk, &rfcomm_sk_list.head) { if (rfcomm_pi(sk)->channel == channel && !bacmp(&bt_sk(sk)->src, src)) break; } - return node ? sk : NULL; + return sk ? sk : NULL; } /* Find socket with channel and source bdaddr. @@ -124,11 +123,10 @@ static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src) static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src) { struct sock *sk = NULL, *sk1 = NULL; - struct hlist_node *node; read_lock(&rfcomm_sk_list.lock); - sk_for_each(sk, node, &rfcomm_sk_list.head) { + sk_for_each(sk, &rfcomm_sk_list.head) { if (state && sk->sk_state != state) continue; @@ -145,7 +143,7 @@ static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t * read_unlock(&rfcomm_sk_list.lock); - return node ? sk : sk1; + return sk ? sk : sk1; } static void rfcomm_sock_destruct(struct sock *sk) @@ -610,6 +608,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { rfcomm_dlc_accept(d); + msg->msg_namelen = 0; return 0; } @@ -970,11 +969,10 @@ done: static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p) { struct sock *sk; - struct hlist_node *node; read_lock(&rfcomm_sk_list.lock); - sk_for_each(sk, node, &rfcomm_sk_list.head) { + sk_for_each(sk, &rfcomm_sk_list.head) { seq_printf(f, "%pMR %pMR %d %d\n", &bt_sk(sk)->src, &bt_sk(sk)->dst, sk->sk_state, rfcomm_pi(sk)->channel); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index bd6fd0f..b6e44ad 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -541,23 +541,21 @@ int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb) { struct rfcomm_dev *dev = dlc->owner; - struct tty_struct *tty; if (!dev) { kfree_skb(skb); return; } - tty = dev->port.tty; - if (!tty || !skb_queue_empty(&dev->pending)) { + if (!skb_queue_empty(&dev->pending)) { skb_queue_tail(&dev->pending, skb); return; } - BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len); + BT_DBG("dlc %p len %d", dlc, skb->len); - tty_insert_flip_string(tty, skb->data, skb->len); - tty_flip_buffer_push(tty); + tty_insert_flip_string(&dev->port, skb->data, skb->len); + tty_flip_buffer_push(&dev->port); kfree_skb(skb); } @@ -621,26 +619,23 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) /* ---- TTY functions ---- */ static void rfcomm_tty_copy_pending(struct rfcomm_dev *dev) { - struct tty_struct *tty = dev->port.tty; struct sk_buff *skb; int inserted = 0; - if (!tty) - return; - - BT_DBG("dev %p tty %p", dev, tty); + BT_DBG("dev %p", dev); rfcomm_dlc_lock(dev->dlc); while ((skb = skb_dequeue(&dev->pending))) { - inserted += tty_insert_flip_string(tty, skb->data, skb->len); + inserted += tty_insert_flip_string(&dev->port, skb->data, + skb->len); kfree_skb(skb); } rfcomm_dlc_unlock(dev->dlc); if (inserted > 0) - tty_flip_buffer_push(tty); + tty_flip_buffer_push(&dev->port); } static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 531a93d..fb6192c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -259,10 +259,9 @@ drop: /* -------- Socket interface ---------- */ static struct sock *__sco_get_sock_listen_by_addr(bdaddr_t *ba) { - struct hlist_node *node; struct sock *sk; - sk_for_each(sk, node, &sco_sk_list.head) { + sk_for_each(sk, &sco_sk_list.head) { if (sk->sk_state != BT_LISTEN) continue; @@ -279,11 +278,10 @@ static struct sock *__sco_get_sock_listen_by_addr(bdaddr_t *ba) static struct sock *sco_get_sock_listen(bdaddr_t *src) { struct sock *sk = NULL, *sk1 = NULL; - struct hlist_node *node; read_lock(&sco_sk_list.lock); - sk_for_each(sk, node, &sco_sk_list.head) { + sk_for_each(sk, &sco_sk_list.head) { if (sk->sk_state != BT_LISTEN) continue; @@ -298,7 +296,7 @@ static struct sock *sco_get_sock_listen(bdaddr_t *src) read_unlock(&sco_sk_list.lock); - return node ? sk : sk1; + return sk ? sk : sk1; } static void sco_sock_destruct(struct sock *sk) @@ -352,7 +350,7 @@ static void __sco_sock_close(struct sock *sk) case BT_CONNECTED: case BT_CONFIG: - if (sco_pi(sk)->conn) { + if (sco_pi(sk)->conn->hcon) { sk->sk_state = BT_DISCONN; sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); hci_conn_put(sco_pi(sk)->conn->hcon); @@ -361,6 +359,7 @@ static void __sco_sock_close(struct sock *sk) sco_chan_del(sk, ECONNRESET); break; + case BT_CONNECT2: case BT_CONNECT: case BT_DISCONN: sco_chan_del(sk, ECONNRESET); @@ -666,6 +665,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { hci_conn_accept(pi->conn->hcon, 0); sk->sk_state = BT_CONFIG; + msg->msg_namelen = 0; release_sock(sk); return 0; @@ -900,8 +900,6 @@ static void sco_conn_ready(struct sco_conn *conn) BT_DBG("conn %p", conn); - sco_conn_lock(conn); - if (sk) { sco_sock_clear_timer(sk); bh_lock_sock(sk); @@ -909,9 +907,13 @@ static void sco_conn_ready(struct sco_conn *conn) sk->sk_state_change(sk); bh_unlock_sock(sk); } else { + sco_conn_lock(conn); + parent = sco_get_sock_listen(conn->src); - if (!parent) - goto done; + if (!parent) { + sco_conn_unlock(conn); + return; + } bh_lock_sock(parent); @@ -919,7 +921,8 @@ static void sco_conn_ready(struct sco_conn *conn) BTPROTO_SCO, GFP_ATOMIC); if (!sk) { bh_unlock_sock(parent); - goto done; + sco_conn_unlock(conn); + return; } sco_sock_init(sk, parent); @@ -939,24 +942,22 @@ static void sco_conn_ready(struct sco_conn *conn) parent->sk_data_ready(parent, 1); bh_unlock_sock(parent); - } -done: - sco_conn_unlock(conn); + sco_conn_unlock(conn); + } } /* ----- SCO interface with lower layer (HCI) ----- */ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) { struct sock *sk; - struct hlist_node *node; int lm = 0; BT_DBG("hdev %s, bdaddr %pMR", hdev->name, bdaddr); /* Find listening sockets */ read_lock(&sco_sk_list.lock); - sk_for_each(sk, node, &sco_sk_list.head) { + sk_for_each(sk, &sco_sk_list.head) { if (sk->sk_state != BT_LISTEN) continue; @@ -1016,11 +1017,10 @@ drop: static int sco_debugfs_show(struct seq_file *f, void *p) { struct sock *sk; - struct hlist_node *node; read_lock(&sco_sk_list.lock); - sk_for_each(sk, node, &sco_sk_list.head) { + sk_for_each(sk, &sco_sk_list.head) { seq_printf(f, "%pMR %pMR %d\n", &bt_sk(sk)->src, &bt_sk(sk)->dst, sk->sk_state); } diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 68a9587..5abefb1 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -859,6 +859,19 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) skb_pull(skb, sizeof(code)); + /* + * The SMP context must be initialized for all other PDUs except + * pairing and security requests. If we get any other PDU when + * not initialized simply disconnect (done if this function + * returns an error). + */ + if (code != SMP_CMD_PAIRING_REQ && code != SMP_CMD_SECURITY_REQ && + !conn->smp_chan) { + BT_ERR("Unexpected SMP command 0x%02x. Disconnecting.", code); + kfree_skb(skb); + return -ENOTSUPP; + } + switch (code) { case SMP_CMD_PAIRING_REQ: reason = smp_cmd_pairing_req(conn, skb); diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 6dee7bf..aa0d3b2 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -46,3 +46,17 @@ config BRIDGE_IGMP_SNOOPING Say N to exclude this support and reduce the binary size. If unsure, say Y. + +config BRIDGE_VLAN_FILTERING + bool "VLAN filtering" + depends on BRIDGE + depends on VLAN_8021Q + default n + ---help--- + If you say Y here, then the Ethernet bridge will be able selectively + receive and forward traffic based on VLAN information in the packet + any VLAN information configured on the bridge port or bridge device. + + Say N to exclude this support and reduce the binary size. + + If unsure, say Y. diff --git a/net/bridge/Makefile b/net/bridge/Makefile index e859098..e85498b2f 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -14,4 +14,6 @@ bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o +bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o + obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/ diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 7c78e26..314c73e 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -30,6 +30,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + u16 vid = 0; rcu_read_lock(); #ifdef CONFIG_BRIDGE_NETFILTER @@ -45,6 +46,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) brstats->tx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); + if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid)) + goto out; + BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); @@ -62,12 +66,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) goto out; } - mdst = br_mdb_get(br, skb); + mdst = br_mdb_get(br, skb, vid); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) br_multicast_deliver(mdst, skb); else br_flood_deliver(br, skb); - } else if ((dst = __br_fdb_get(br, dest)) != NULL) + } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) br_deliver(dst->dst, skb); else br_flood_deliver(br, skb); @@ -172,12 +176,10 @@ static int br_set_mac_address(struct net_device *dev, void *p) spin_lock_bh(&br->lock); if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) { - dev->addr_assign_type &= ~NET_ADDR_RANDOM; memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); br_fdb_change_mac_address(br, addr->sa_data); br_stp_change_bridge_id(br, addr->sa_data); } - br->flags |= BR_SET_MAC_ADDR; spin_unlock_bh(&br->lock); return 0; @@ -185,10 +187,10 @@ static int br_set_mac_address(struct net_device *dev, void *p) static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strcpy(info->driver, "bridge"); - strcpy(info->version, BR_VERSION); - strcpy(info->fw_version, "N/A"); - strcpy(info->bus_info, "N/A"); + strlcpy(info->driver, "bridge", sizeof(info->driver)); + strlcpy(info->version, BR_VERSION, sizeof(info->version)); + strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strlcpy(info->bus_info, "N/A", sizeof(info->bus_info)); } static netdev_features_t br_fix_features(struct net_device *dev, @@ -267,7 +269,7 @@ void br_netpoll_disable(struct net_bridge_port *p) p->np = NULL; - __netpoll_free_rcu(np); + __netpoll_free_async(np); } #endif @@ -315,6 +317,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_dump = br_fdb_dump, .ndo_bridge_getlink = br_getlink, .ndo_bridge_setlink = br_setlink, + .ndo_bridge_dellink = br_dellink, }; static void br_dev_free(struct net_device *dev) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index d9576e6..bab338e 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -23,11 +23,12 @@ #include <linux/slab.h> #include <linux/atomic.h> #include <asm/unaligned.h> +#include <linux/if_vlan.h> #include "br_private.h" static struct kmem_cache *br_fdb_cache __read_mostly; static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr); + const unsigned char *addr, u16 vid); static void fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *, int); @@ -67,11 +68,11 @@ static inline int has_expired(const struct net_bridge *br, time_before_eq(fdb->updated + hold_time(br), jiffies); } -static inline int br_mac_hash(const unsigned char *mac) +static inline int br_mac_hash(const unsigned char *mac, __u16 vid) { - /* use 1 byte of OUI cnd 3 bytes of NIC */ + /* use 1 byte of OUI and 3 bytes of NIC */ u32 key = get_unaligned((u32 *)(mac + 2)); - return jhash_1word(key, fdb_salt) & (BR_HASH_SIZE - 1); + return jhash_2words(key, vid, fdb_salt) & (BR_HASH_SIZE - 1); } static void fdb_rcu_free(struct rcu_head *head) @@ -91,6 +92,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) { struct net_bridge *br = p->br; + bool no_vlan = (nbp_get_vlan_info(p) == NULL) ? true : false; int i; spin_lock_bh(&br->hash_lock); @@ -105,10 +107,12 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) if (f->dst == p && f->is_local) { /* maybe another port has same hw addr? */ struct net_bridge_port *op; + u16 vid = f->vlan_id; list_for_each_entry(op, &br->port_list, list) { if (op != p && ether_addr_equal(op->dev->dev_addr, - f->addr.addr)) { + f->addr.addr) && + nbp_vlan_find(op, vid)) { f->dst = op; goto insert; } @@ -116,27 +120,55 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) /* delete old one */ fdb_delete(br, f); - goto insert; +insert: + /* insert new address, may fail if invalid + * address or dup. + */ + fdb_insert(br, p, newaddr, vid); + + /* if this port has no vlan information + * configured, we can safely be done at + * this point. + */ + if (no_vlan) + goto done; } } } - insert: - /* insert new address, may fail if invalid address or dup. */ - fdb_insert(br, p, newaddr); +done: spin_unlock_bh(&br->hash_lock); } void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) { struct net_bridge_fdb_entry *f; + struct net_port_vlans *pv; + u16 vid = 0; /* If old entry was unassociated with any port, then delete it. */ - f = __br_fdb_get(br, br->dev->dev_addr); + f = __br_fdb_get(br, br->dev->dev_addr, 0); if (f && f->is_local && !f->dst) fdb_delete(br, f); - fdb_insert(br, NULL, newaddr); + fdb_insert(br, NULL, newaddr, 0); + + /* Now remove and add entries for every VLAN configured on the + * bridge. This function runs under RTNL so the bitmap will not + * change from under us. + */ + pv = br_get_vlan_info(br); + if (!pv) + return; + + for (vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid); + vid < BR_VLAN_BITMAP_LEN; + vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) { + f = __br_fdb_get(br, br->dev->dev_addr, vid); + if (f && f->is_local && !f->dst) + fdb_delete(br, f); + fdb_insert(br, NULL, newaddr, vid); + } } void br_fdb_cleanup(unsigned long _data) @@ -149,9 +181,9 @@ void br_fdb_cleanup(unsigned long _data) spin_lock(&br->hash_lock); for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; - struct hlist_node *h, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) { + hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { unsigned long this_timer; if (f->is_static) continue; @@ -175,8 +207,8 @@ void br_fdb_flush(struct net_bridge *br) spin_lock_bh(&br->hash_lock); for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; - struct hlist_node *h, *n; - hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) { + struct hlist_node *n; + hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { if (!f->is_static) fdb_delete(br, f); } @@ -231,13 +263,15 @@ void br_fdb_delete_by_port(struct net_bridge *br, /* No locking or refcounting, assumes caller has rcu_read_lock */ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr) + const unsigned char *addr, + __u16 vid) { - struct hlist_node *h; struct net_bridge_fdb_entry *fdb; - hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) { - if (ether_addr_equal(fdb->addr.addr, addr)) { + hlist_for_each_entry_rcu(fdb, + &br->hash[br_mac_hash(addr, vid)], hlist) { + if (ether_addr_equal(fdb->addr.addr, addr) && + fdb->vlan_id == vid) { if (unlikely(has_expired(br, fdb))) break; return fdb; @@ -261,7 +295,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) if (!port) ret = 0; else { - fdb = __br_fdb_get(port->br, addr); + fdb = __br_fdb_get(port->br, addr, 0); ret = fdb && fdb->dst && fdb->dst->dev != dev && fdb->dst->state == BR_STATE_FORWARDING; } @@ -280,14 +314,13 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, { struct __fdb_entry *fe = buf; int i, num = 0; - struct hlist_node *h; struct net_bridge_fdb_entry *f; memset(buf, 0, maxnum*sizeof(struct __fdb_entry)); rcu_read_lock(); for (i = 0; i < BR_HASH_SIZE; i++) { - hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) { + hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { if (num >= maxnum) goto out; @@ -325,26 +358,28 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, } static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, - const unsigned char *addr) + const unsigned char *addr, + __u16 vid) { - struct hlist_node *h; struct net_bridge_fdb_entry *fdb; - hlist_for_each_entry(fdb, h, head, hlist) { - if (ether_addr_equal(fdb->addr.addr, addr)) + hlist_for_each_entry(fdb, head, hlist) { + if (ether_addr_equal(fdb->addr.addr, addr) && + fdb->vlan_id == vid) return fdb; } return NULL; } static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, - const unsigned char *addr) + const unsigned char *addr, + __u16 vid) { - struct hlist_node *h; struct net_bridge_fdb_entry *fdb; - hlist_for_each_entry_rcu(fdb, h, head, hlist) { - if (ether_addr_equal(fdb->addr.addr, addr)) + hlist_for_each_entry_rcu(fdb, head, hlist) { + if (ether_addr_equal(fdb->addr.addr, addr) && + fdb->vlan_id == vid) return fdb; } return NULL; @@ -352,7 +387,8 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, + __u16 vid) { struct net_bridge_fdb_entry *fdb; @@ -360,6 +396,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, if (fdb) { memcpy(fdb->addr.addr, addr, ETH_ALEN); fdb->dst = source; + fdb->vlan_id = vid; fdb->is_local = 0; fdb->is_static = 0; fdb->updated = fdb->used = jiffies; @@ -369,15 +406,15 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, } static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, u16 vid) { - struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; if (!is_valid_ether_addr(addr)) return -EINVAL; - fdb = fdb_find(head, addr); + fdb = fdb_find(head, addr, vid); if (fdb) { /* it is okay to have multiple ports with same * address, just use the first one. @@ -386,11 +423,11 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, return 0; br_warn(br, "adding interface %s with same address " "as a received packet\n", - source->dev->name); + source ? source->dev->name : br->dev->name); fdb_delete(br, fdb); } - fdb = fdb_create(head, source, addr); + fdb = fdb_create(head, source, addr, vid); if (!fdb) return -ENOMEM; @@ -401,20 +438,20 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, /* Add entry for local address of interface */ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, u16 vid) { int ret; spin_lock_bh(&br->hash_lock); - ret = fdb_insert(br, source, addr); + ret = fdb_insert(br, source, addr, vid); spin_unlock_bh(&br->hash_lock); return ret; } void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, u16 vid) { - struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; /* some users want to always flood. */ @@ -426,7 +463,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, source->state == BR_STATE_FORWARDING)) return; - fdb = fdb_find_rcu(head, addr); + fdb = fdb_find_rcu(head, addr, vid); if (likely(fdb)) { /* attempt to update an entry for a local interface */ if (unlikely(fdb->is_local)) { @@ -441,8 +478,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, } } else { spin_lock(&br->hash_lock); - if (likely(!fdb_find(head, addr))) { - fdb = fdb_create(head, source, addr); + if (likely(!fdb_find(head, addr, vid))) { + fdb = fdb_create(head, source, addr, vid); if (fdb) fdb_notify(br, fdb, RTM_NEWNEIGH); } @@ -495,6 +532,10 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, ci.ndm_refcnt = 0; if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; + + if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -506,6 +547,7 @@ static inline size_t fdb_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + nla_total_size(sizeof(struct nda_cacheinfo)); } @@ -547,10 +589,9 @@ int br_fdb_dump(struct sk_buff *skb, goto out; for (i = 0; i < BR_HASH_SIZE; i++) { - struct hlist_node *h; struct net_bridge_fdb_entry *f; - hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) { + hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { if (idx < cb->args[0]) goto skip; @@ -571,18 +612,18 @@ out: /* Update (create or replace) forwarding database entry */ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, - __u16 state, __u16 flags) + __u16 state, __u16 flags, __u16 vid) { struct net_bridge *br = source->br; - struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; - fdb = fdb_find(head, addr); + fdb = fdb_find(head, addr, vid); if (fdb == NULL) { if (!(flags & NLM_F_CREATE)) return -ENOENT; - fdb = fdb_create(head, source, addr); + fdb = fdb_create(head, source, addr, vid); if (!fdb) return -ENOMEM; fdb_notify(br, fdb, RTM_NEWNEIGH); @@ -607,6 +648,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, return 0; } +static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p, + const unsigned char *addr, u16 nlh_flags, u16 vid) +{ + int err = 0; + + if (ndm->ndm_flags & NTF_USE) { + rcu_read_lock(); + br_fdb_update(p->br, p, addr, vid); + rcu_read_unlock(); + } else { + spin_lock_bh(&p->br->hash_lock); + err = fdb_add_entry(p, addr, ndm->ndm_state, + nlh_flags, vid); + spin_unlock_bh(&p->br->hash_lock); + } + + return err; +} + /* Add new permanent fdb entry with RTM_NEWNEIGH */ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, @@ -614,12 +674,29 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { struct net_bridge_port *p; int err = 0; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); return -EINVAL; } + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } + p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", @@ -627,40 +704,90 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (ndm->ndm_flags & NTF_USE) { - rcu_read_lock(); - br_fdb_update(p->br, p, addr); - rcu_read_unlock(); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_NEWNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + + /* VID was specified, so use it. */ + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); } else { - spin_lock_bh(&p->br->hash_lock); - err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags); - spin_unlock_bh(&p->br->hash_lock); + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); + if (err) + goto out; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } } +out: return err; } -static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) +int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, + u16 vlan) { - struct net_bridge *br = p->br; - struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)]; struct net_bridge_fdb_entry *fdb; - fdb = fdb_find(head, addr); + fdb = fdb_find(head, addr, vlan); if (!fdb) return -ENOENT; - fdb_delete(p->br, fdb); + fdb_delete(br, fdb); return 0; } +static int __br_fdb_delete(struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + int err; + + spin_lock_bh(&p->br->hash_lock); + err = fdb_delete_by_addr(p->br, addr, vid); + spin_unlock_bh(&p->br->hash_lock); + + return err; +} + /* Remove neighbor entry with RTM_DELNEIGH */ -int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, +int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr) { struct net_bridge_port *p; int err; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; + + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", @@ -668,9 +795,33 @@ int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, return -EINVAL; } - spin_lock_bh(&p->br->hash_lock); - err = fdb_delete_by_addr(p, addr); - spin_unlock_bh(&p->br->hash_lock); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_DELNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + + err = __br_fdb_delete(p, addr, vid); + } else { + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_delete(p, addr, 0); + goto out; + } + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + err = -ENOENT; + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err &= __br_fdb_delete(p, addr, vid); + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } + } +out: return err; } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 02015a5..092b20e 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -31,6 +31,7 @@ static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) { return (((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && + br_allowed_egress(p->br, nbp_get_vlan_info(p), skb) && p->state == BR_STATE_FORWARDING); } @@ -63,6 +64,10 @@ int br_forward_finish(struct sk_buff *skb) static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { + skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb); + if (!skb) + return; + skb->dev = to->dev; if (unlikely(netpoll_tx_running(to->br->dev))) { @@ -88,6 +93,10 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) return; } + skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb); + if (!skb) + return; + indev = skb->dev; skb->dev = to->dev; skb_forward_csum(skb); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 1c8fdc3..459dab2 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -23,6 +23,7 @@ #include <linux/if_ether.h> #include <linux/slab.h> #include <net/sock.h> +#include <linux/if_vlan.h> #include "br_private.h" @@ -66,14 +67,15 @@ void br_port_carrier_check(struct net_bridge_port *p) struct net_device *dev = p->dev; struct net_bridge *br = p->br; - if (netif_running(dev) && netif_carrier_ok(dev)) + if (!(p->flags & BR_ADMIN_COST) && + netif_running(dev) && netif_oper_up(dev)) p->path_cost = port_cost(dev); if (!netif_running(br->dev)) return; spin_lock_bh(&br->lock); - if (netif_running(dev) && netif_carrier_ok(dev)) { + if (netif_running(dev) && netif_oper_up(dev)) { if (p->state == BR_STATE_DISABLED) br_stp_enable_port(p); } else { @@ -139,6 +141,7 @@ static void del_nbp(struct net_bridge_port *p) br_ifinfo_notify(RTM_DELLINK, p); + nbp_vlan_flush(p); br_fdb_delete_by_port(br, p, 1); list_del_rcu(&p->list); @@ -148,7 +151,7 @@ static void del_nbp(struct net_bridge_port *p) netdev_rx_handler_unregister(dev); synchronize_net(); - netdev_set_master(dev, NULL); + netdev_upper_dev_unlink(dev, br->dev); br_multicast_del_port(p); @@ -364,13 +367,13 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL)))) goto err3; - err = netdev_set_master(dev, br->dev); + err = netdev_master_upper_dev_link(dev, br->dev); if (err) - goto err3; + goto err4; err = netdev_rx_handler_register(dev, br_handle_frame, p); if (err) - goto err4; + goto err5; dev->priv_flags |= IFF_BRIDGE_PORT; @@ -383,7 +386,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) spin_lock_bh(&br->lock); changed_addr = br_stp_recalculate_bridge_id(br); - if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) && + if (netif_running(dev) && netif_oper_up(dev) && (br->dev->flags & IFF_UP)) br_stp_enable_port(p); spin_unlock_bh(&br->lock); @@ -395,15 +398,17 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev_set_mtu(br->dev, br_min_mtu(br)); - if (br_fdb_insert(br, p, dev->dev_addr)) + if (br_fdb_insert(br, p, dev->dev_addr, 0)) netdev_err(dev, "failed insert local address bridge forwarding table\n"); kobject_uevent(&p->kobj, KOBJ_ADD); return 0; +err5: + netdev_upper_dev_unlink(dev, br->dev); err4: - netdev_set_master(dev, NULL); + br_netpoll_disable(p); err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 4b34207..828e2bc 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -17,6 +17,7 @@ #include <linux/etherdevice.h> #include <linux/netfilter_bridge.h> #include <linux/export.h> +#include <linux/rculist.h> #include "br_private.h" /* Hook for brouter */ @@ -34,6 +35,20 @@ static int br_pass_frame_up(struct sk_buff *skb) brstats->rx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); + /* Bridge is just like any other port. Make sure the + * packet is allowed except in promisc modue when someone + * may be running packet capture. + */ + if (!(brdev->flags & IFF_PROMISC) && + !br_allowed_egress(br, br_get_vlan_info(br), skb)) { + kfree_skb(skb); + return NET_RX_DROP; + } + + skb = br_handle_vlan(br, br_get_vlan_info(br), skb); + if (!skb) + return NET_RX_DROP; + indev = skb->dev; skb->dev = brdev; @@ -50,13 +65,17 @@ int br_handle_frame_finish(struct sk_buff *skb) struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct sk_buff *skb2; + u16 vid = 0; if (!p || p->state == BR_STATE_DISABLED) goto drop; + if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid)) + goto drop; + /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; - br_fdb_update(br, p, eth_hdr(skb)->h_source); + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && br_multicast_rcv(br, p, skb)) @@ -78,7 +97,7 @@ int br_handle_frame_finish(struct sk_buff *skb) if (is_broadcast_ether_addr(dest)) skb2 = skb; else if (is_multicast_ether_addr(dest)) { - mdst = br_mdb_get(br, skb); + mdst = br_mdb_get(br, skb, vid); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { if ((mdst && mdst->mglist) || br_multicast_is_router(br)) @@ -91,7 +110,8 @@ int br_handle_frame_finish(struct sk_buff *skb) skb2 = skb; br->dev->stats.multicast++; - } else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { + } else if ((dst = __br_fdb_get(br, dest, vid)) && + dst->is_local) { skb2 = skb; /* Do not forward the packet since it's local. */ skb = NULL; @@ -119,8 +139,10 @@ drop: static int br_handle_local_finish(struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); + u16 vid = 0; - br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + br_vlan_get_tag(skb, &vid); + br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); return 0; /* process further */ } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index acc9f4c..ee79f3f 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -18,7 +18,6 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p; - struct hlist_node *n; struct nlattr *nest; if (!br->multicast_router || hlist_empty(&br->router_list)) @@ -28,7 +27,7 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, if (nest == NULL) return -EMSGSIZE; - hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) { + hlist_for_each_entry_rcu(p, &br->router_list, rlist) { if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex)) goto fail; } @@ -61,12 +60,11 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, return -EMSGSIZE; for (i = 0; i < mdb->max; i++) { - struct hlist_node *h; struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p, **pp; struct net_bridge_port *port; - hlist_for_each_entry_rcu(mp, h, &mdb->mhash[i], hlist[mdb->ver]) { + hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) { if (idx < s_idx) goto skip; @@ -82,6 +80,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, port = p->port; if (port) { struct br_mdb_entry e; + memset(&e, 0, sizeof(e)); e.ifindex = port->dev->ifindex; e.state = p->state; if (p->addr.proto == htons(ETH_P_IP)) @@ -138,6 +137,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) break; bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); bpm->ifindex = dev->ifindex; if (br_mdb_fill_info(skb, cb, dev) < 0) goto out; @@ -173,6 +173,7 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb, return -EMSGSIZE; bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = dev->ifindex; nest = nla_nest_start(skb, MDBA_MDB); @@ -230,6 +231,7 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, { struct br_mdb_entry entry; + memset(&entry, 0, sizeof(entry)); entry.ifindex = port->dev->ifindex; entry.addr.proto = group->proto; entry.addr.u.ip4 = group->u.ip4; @@ -272,9 +274,6 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *dev; int err; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY, NULL); if (err < 0) return err; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 5391ca4..923fbea 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -39,6 +39,8 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) { if (a->proto != b->proto) return 0; + if (a->vid != b->vid) + return 0; switch (a->proto) { case htons(ETH_P_IP): return a->u.ip4 == b->u.ip4; @@ -50,16 +52,19 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) return 0; } -static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip) +static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip, + __u16 vid) { - return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1); + return jhash_2words((__force u32)ip, vid, mdb->secret) & (mdb->max - 1); } #if IS_ENABLED(CONFIG_IPV6) static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb, - const struct in6_addr *ip) + const struct in6_addr *ip, + __u16 vid) { - return jhash2((__force u32 *)ip->s6_addr32, 4, mdb->secret) & (mdb->max - 1); + return jhash_2words(ipv6_addr_hash(ip), vid, + mdb->secret) & (mdb->max - 1); } #endif @@ -68,10 +73,10 @@ static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, { switch (ip->proto) { case htons(ETH_P_IP): - return __br_ip4_hash(mdb, ip->u.ip4); + return __br_ip4_hash(mdb, ip->u.ip4, ip->vid); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - return __br_ip6_hash(mdb, &ip->u.ip6); + return __br_ip6_hash(mdb, &ip->u.ip6, ip->vid); #endif } return 0; @@ -81,9 +86,8 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get( struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash) { struct net_bridge_mdb_entry *mp; - struct hlist_node *p; - hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { + hlist_for_each_entry_rcu(mp, &mdb->mhash[hash], hlist[mdb->ver]) { if (br_ip_equal(&mp->addr, dst)) return mp; } @@ -101,31 +105,34 @@ struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, } static struct net_bridge_mdb_entry *br_mdb_ip4_get( - struct net_bridge_mdb_htable *mdb, __be32 dst) + struct net_bridge_mdb_htable *mdb, __be32 dst, __u16 vid) { struct br_ip br_dst; br_dst.u.ip4 = dst; br_dst.proto = htons(ETH_P_IP); + br_dst.vid = vid; return br_mdb_ip_get(mdb, &br_dst); } #if IS_ENABLED(CONFIG_IPV6) static struct net_bridge_mdb_entry *br_mdb_ip6_get( - struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst) + struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst, + __u16 vid) { struct br_ip br_dst; br_dst.u.ip6 = *dst; br_dst.proto = htons(ETH_P_IPV6); + br_dst.vid = vid; return br_mdb_ip_get(mdb, &br_dst); } #endif struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, - struct sk_buff *skb) + struct sk_buff *skb, u16 vid) { struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb); struct br_ip ip; @@ -137,6 +144,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, return NULL; ip.proto = skb->protocol; + ip.vid = vid; switch (skb->protocol) { case htons(ETH_P_IP): @@ -170,13 +178,12 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new, int elasticity) { struct net_bridge_mdb_entry *mp; - struct hlist_node *p; int maxlen; int len; int i; for (i = 0; i < old->max; i++) - hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver]) + hlist_for_each_entry(mp, &old->mhash[i], hlist[old->ver]) hlist_add_head(&mp->hlist[new->ver], &new->mhash[br_ip_hash(new, &mp->addr)]); @@ -186,7 +193,7 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new, maxlen = 0; for (i = 0; i < new->max; i++) { len = 0; - hlist_for_each_entry(mp, p, &new->mhash[i], hlist[new->ver]) + hlist_for_each_entry(mp, &new->mhash[i], hlist[new->ver]) len++; if (len > maxlen) maxlen = len; @@ -502,14 +509,13 @@ static struct net_bridge_mdb_entry *br_multicast_get_group( { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; - struct hlist_node *p; unsigned int count = 0; unsigned int max; int elasticity; int err; mdb = rcu_dereference_protected(br->mdb, 1); - hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { + hlist_for_each_entry(mp, &mdb->mhash[hash], hlist[mdb->ver]) { count++; if (unlikely(br_ip_equal(group, &mp->addr))) return mp; @@ -694,7 +700,8 @@ err: static int br_ip4_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, - __be32 group) + __be32 group, + __u16 vid) { struct br_ip br_group; @@ -703,6 +710,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); + br_group.vid = vid; return br_multicast_add_group(br, port, &br_group); } @@ -710,7 +718,8 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, - const struct in6_addr *group) + const struct in6_addr *group, + __u16 vid) { struct br_ip br_group; @@ -719,6 +728,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); + br_group.vid = vid; return br_multicast_add_group(br, port, &br_group); } @@ -870,10 +880,10 @@ void br_multicast_disable_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; struct net_bridge_port_group *pg; - struct hlist_node *p, *n; + struct hlist_node *n; spin_lock(&br->multicast_lock); - hlist_for_each_entry_safe(pg, p, n, &port->mglist, mglist) + hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) br_multicast_del_pg(br, pg); if (!hlist_unhashed(&port->rlist)) @@ -895,10 +905,12 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, int type; int err = 0; __be32 group; + u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*ih))) return -EINVAL; + br_vlan_get_tag(skb, &vid); ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); len = sizeof(*ih); @@ -930,7 +942,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, continue; } - err = br_ip4_multicast_add_group(br, port, group); + err = br_ip4_multicast_add_group(br, port, group, vid); if (err) break; } @@ -949,10 +961,12 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, int len; int num; int err = 0; + u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*icmp6h))) return -EINVAL; + br_vlan_get_tag(skb, &vid); icmp6h = icmp6_hdr(skb); num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); len = sizeof(*icmp6h); @@ -990,7 +1004,8 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, continue; } - err = br_ip6_multicast_add_group(br, port, &grec->grec_mca); + err = br_ip6_multicast_add_group(br, port, &grec->grec_mca, + vid); if (!err) break; } @@ -1008,12 +1023,12 @@ static void br_multicast_add_router(struct net_bridge *br, struct net_bridge_port *port) { struct net_bridge_port *p; - struct hlist_node *n, *slot = NULL; + struct hlist_node *slot = NULL; - hlist_for_each_entry(p, n, &br->router_list, rlist) { + hlist_for_each_entry(p, &br->router_list, rlist) { if ((unsigned long) port >= (unsigned long) p) break; - slot = n; + slot = &p->rlist; } if (slot) @@ -1074,6 +1089,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, unsigned long now = jiffies; __be32 group; int err = 0; + u16 vid = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1108,7 +1124,8 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!group) goto out; - mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group); + br_vlan_get_tag(skb, &vid); + mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) goto out; @@ -1149,6 +1166,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, unsigned long now = jiffies; const struct in6_addr *group = NULL; int err = 0; + u16 vid = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1180,7 +1198,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!group) goto out; - mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group); + br_vlan_get_tag(skb, &vid); + mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) goto out; @@ -1286,7 +1305,8 @@ out: static void br_ip4_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, - __be32 group) + __be32 group, + __u16 vid) { struct br_ip br_group; @@ -1295,6 +1315,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); + br_group.vid = vid; br_multicast_leave_group(br, port, &br_group); } @@ -1302,7 +1323,8 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, - const struct in6_addr *group) + const struct in6_addr *group, + __u16 vid) { struct br_ip br_group; @@ -1311,6 +1333,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); + br_group.vid = vid; br_multicast_leave_group(br, port, &br_group); } @@ -1326,6 +1349,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, unsigned int len; unsigned int offset; int err; + u16 vid = 0; /* We treat OOM as packet loss for now. */ if (!pskb_may_pull(skb, sizeof(*iph))) @@ -1386,6 +1410,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = 0; + br_vlan_get_tag(skb2, &vid); BR_INPUT_SKB_CB(skb)->igmp = 1; ih = igmp_hdr(skb2); @@ -1393,7 +1418,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - err = br_ip4_multicast_add_group(br, port, ih->group); + err = br_ip4_multicast_add_group(br, port, ih->group, vid); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: err = br_ip4_multicast_igmp3_report(br, port, skb2); @@ -1402,7 +1427,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_query(br, port, skb2); break; case IGMP_HOST_LEAVE_MESSAGE: - br_ip4_multicast_leave_group(br, port, ih->group); + br_ip4_multicast_leave_group(br, port, ih->group, vid); break; } @@ -1427,6 +1452,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, unsigned int len; int offset; int err; + u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*ip6h))) return -EINVAL; @@ -1510,6 +1536,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, err = 0; + br_vlan_get_tag(skb, &vid); BR_INPUT_SKB_CB(skb)->igmp = 1; switch (icmp6_type) { @@ -1522,7 +1549,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, } mld = (struct mld_msg *)skb_transport_header(skb2); BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); + err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid); break; } case ICMPV6_MLD2_REPORT: @@ -1539,7 +1566,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, goto out; } mld = (struct mld_msg *)skb_transport_header(skb2); - br_ip6_multicast_leave_group(br, port, &mld->mld_mca); + br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid); } } @@ -1608,7 +1635,6 @@ void br_multicast_init(struct net_bridge *br) br_multicast_querier_expired, (unsigned long)br); setup_timer(&br->multicast_query_timer, br_multicast_query_expired, (unsigned long)br); - br_mdb_init(); } void br_multicast_open(struct net_bridge *br) @@ -1625,7 +1651,7 @@ void br_multicast_stop(struct net_bridge *br) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; - struct hlist_node *p, *n; + struct hlist_node *n; u32 ver; int i; @@ -1633,7 +1659,6 @@ void br_multicast_stop(struct net_bridge *br) del_timer_sync(&br->multicast_querier_timer); del_timer_sync(&br->multicast_query_timer); - br_mdb_uninit(); spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); if (!mdb) @@ -1643,7 +1668,7 @@ void br_multicast_stop(struct net_bridge *br) ver = mdb->ver; for (i = 0; i < mdb->max; i++) { - hlist_for_each_entry_safe(mp, p, n, &mdb->mhash[i], + hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], hlist[ver]) { del_timer(&mp->timer); call_rcu_bh(&mp->rcu, br_multicast_free_group); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 97ba018..299fc5f 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -16,6 +16,7 @@ #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> +#include <uapi/linux/if_bridge.h> #include "br_private.h" #include "br_private_stp.h" @@ -28,6 +29,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_MODE */ + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + 0; } @@ -64,15 +66,21 @@ static int br_port_fill_attrs(struct sk_buff *skb, * Create one netlink message for one interface * Contains port and master info as well as carrier and bridge state. */ -static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *port, - u32 pid, u32 seq, int event, unsigned int flags) +static int br_fill_ifinfo(struct sk_buff *skb, + const struct net_bridge_port *port, + u32 pid, u32 seq, int event, unsigned int flags, + u32 filter_mask, const struct net_device *dev) { - const struct net_bridge *br = port->br; - const struct net_device *dev = port->dev; + const struct net_bridge *br; struct ifinfomsg *hdr; struct nlmsghdr *nlh; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + if (port) + br = port->br; + else + br = netdev_priv(dev); + br_debug(br, "br_fill_info event %d port %s master %s\n", event, dev->name, br->dev->name); @@ -98,7 +106,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_put_u32(skb, IFLA_LINK, dev->iflink))) goto nla_put_failure; - if (event == RTM_NEWLINK) { + if (event == RTM_NEWLINK && port) { struct nlattr *nest = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); @@ -107,6 +115,48 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_nest_end(skb, nest); } + /* Check if the VID information is requested */ + if (filter_mask & RTEXT_FILTER_BRVLAN) { + struct nlattr *af; + const struct net_port_vlans *pv; + struct bridge_vlan_info vinfo; + u16 vid; + u16 pvid; + + if (port) + pv = nbp_get_vlan_info(port); + else + pv = br_get_vlan_info(br); + + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) + goto done; + + af = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af) + goto nla_put_failure; + + pvid = br_get_pvid(pv); + for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + vid < BR_VLAN_BITMAP_LEN; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1)) { + vinfo.vid = vid; + vinfo.flags = 0; + if (vid == pvid) + vinfo.flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + nla_nest_end(skb, af); + } + +done: return nlmsg_end(skb, nlh); nla_put_failure: @@ -119,10 +169,14 @@ nla_put_failure: */ void br_ifinfo_notify(int event, struct net_bridge_port *port) { - struct net *net = dev_net(port->dev); + struct net *net; struct sk_buff *skb; int err = -ENOBUFS; + if (!port) + return; + + net = dev_net(port->dev); br_debug(port->br, "port %u(%s) event %d\n", (unsigned int)port->port_no, port->dev->name, event); @@ -130,7 +184,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) if (skb == NULL) goto errout; - err = br_fill_ifinfo(skb, port, 0, 0, event, 0); + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev); if (err < 0) { /* -EMSGSIZE implies BUG in br_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -144,24 +198,85 @@ errout: rtnl_set_sk_err(net, RTNLGRP_LINK, err); } + /* * Dump information about all ports, in response to GETLINK */ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, u32 filter_mask) { int err = 0; struct net_bridge_port *port = br_port_get_rcu(dev); - /* not a bridge port */ - if (!port) + /* not a bridge port and */ + if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) goto out; - err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI); + err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + filter_mask, dev); out: return err; } +static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = { + [IFLA_BRIDGE_FLAGS] = { .type = NLA_U16 }, + [IFLA_BRIDGE_MODE] = { .type = NLA_U16 }, + [IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY, + .len = sizeof(struct bridge_vlan_info), }, +}; + +static int br_afspec(struct net_bridge *br, + struct net_bridge_port *p, + struct nlattr *af_spec, + int cmd) +{ + struct nlattr *tb[IFLA_BRIDGE_MAX+1]; + int err = 0; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy); + if (err) + return err; + + if (tb[IFLA_BRIDGE_VLAN_INFO]) { + struct bridge_vlan_info *vinfo; + + vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); + + if (vinfo->vid >= VLAN_N_VID) + return -EINVAL; + + switch (cmd) { + case RTM_SETLINK: + if (p) { + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); + if (err) + break; + + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + err = br_vlan_add(p->br, vinfo->vid, + vinfo->flags); + } else + err = br_vlan_add(br, vinfo->vid, vinfo->flags); + + if (err) + break; + + break; + + case RTM_DELLINK: + if (p) { + nbp_vlan_delete(p, vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + br_vlan_delete(p->br, vinfo->vid); + } else + br_vlan_delete(br, vinfo->vid); + break; + } + } + + return err; +} + static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, [IFLA_BRPORT_COST] = { .type = NLA_U32 }, @@ -181,8 +296,11 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state) if (p->br->stp_enabled == BR_KERNEL_STP) return -EBUSY; + /* if device is not up, change is not allowed + * if link is not present, only allowable state is disabled + */ if (!netif_running(p->dev) || - (!netif_carrier_ok(p->dev) && state != BR_STATE_DISABLED)) + (!netif_oper_up(p->dev) && state != BR_STATE_DISABLED)) return -ENETDOWN; p->state = state; @@ -212,6 +330,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); + br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); @@ -238,6 +357,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { struct ifinfomsg *ifm; struct nlattr *protinfo; + struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; int err; @@ -245,38 +365,76 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); - if (!protinfo) + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!protinfo && !afspec) return 0; p = br_port_get_rtnl(dev); - if (!p) + /* We want to accept dev as bridge itself if the AF_SPEC + * is set to see if someone is setting vlan info on the brigde + */ + if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec)) return -EINVAL; - if (protinfo->nla_type & NLA_F_NESTED) { - err = nla_parse_nested(tb, IFLA_BRPORT_MAX, - protinfo, ifla_brport_policy); + if (p && protinfo) { + if (protinfo->nla_type & NLA_F_NESTED) { + err = nla_parse_nested(tb, IFLA_BRPORT_MAX, + protinfo, ifla_brport_policy); + if (err) + return err; + + spin_lock_bh(&p->br->lock); + err = br_setport(p, tb); + spin_unlock_bh(&p->br->lock); + } else { + /* Binary compatability with old RSTP */ + if (nla_len(protinfo) < sizeof(u8)) + return -EINVAL; + + spin_lock_bh(&p->br->lock); + err = br_set_port_state(p, nla_get_u8(protinfo)); + spin_unlock_bh(&p->br->lock); + } if (err) - return err; - - spin_lock_bh(&p->br->lock); - err = br_setport(p, tb); - spin_unlock_bh(&p->br->lock); - } else { - /* Binary compatability with old RSTP */ - if (nla_len(protinfo) < sizeof(u8)) - return -EINVAL; + goto out; + } - spin_lock_bh(&p->br->lock); - err = br_set_port_state(p, nla_get_u8(protinfo)); - spin_unlock_bh(&p->br->lock); + if (afspec) { + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_SETLINK); } if (err == 0) br_ifinfo_notify(RTM_NEWLINK, p); +out: return err; } +/* Delete port information */ +int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) +{ + struct ifinfomsg *ifm; + struct nlattr *afspec; + struct net_bridge_port *p; + int err; + + ifm = nlmsg_data(nlh); + + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!afspec) + return 0; + + p = br_port_get_rtnl(dev); + /* We want to accept dev as bridge itself as well */ + if (!p && !(dev->priv_flags & IFF_EBRIDGE)) + return -EINVAL; + + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_DELLINK); + + return err; +} static int br_validate(struct nlattr *tb[], struct nlattr *data[]) { if (tb[IFLA_ADDRESS]) { @@ -289,6 +447,29 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } +static size_t br_get_link_af_size(const struct net_device *dev) +{ + struct net_port_vlans *pv; + + if (br_port_exists(dev)) + pv = nbp_get_vlan_info(br_port_get_rcu(dev)); + else if (dev->priv_flags & IFF_EBRIDGE) + pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); + else + return 0; + + if (!pv) + return 0; + + /* Each VLAN is returned in bridge_vlan_info along with flags */ + return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); +} + +static struct rtnl_af_ops br_af_ops = { + .family = AF_BRIDGE, + .get_link_af_size = br_get_link_af_size, +}; + struct rtnl_link_ops br_link_ops __read_mostly = { .kind = "bridge", .priv_size = sizeof(struct net_bridge), @@ -299,10 +480,29 @@ struct rtnl_link_ops br_link_ops __read_mostly = { int __init br_netlink_init(void) { - return rtnl_link_register(&br_link_ops); + int err; + + br_mdb_init(); + err = rtnl_af_register(&br_af_ops); + if (err) + goto out; + + err = rtnl_link_register(&br_link_ops); + if (err) + goto out_af; + + return 0; + +out_af: + rtnl_af_unregister(&br_af_ops); +out: + br_mdb_uninit(); + return err; } void __exit br_netlink_fini(void) { + br_mdb_uninit(); + rtnl_af_unregister(&br_af_ops); rtnl_link_unregister(&br_link_ops); } diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index a76b621..1644b3e 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -82,7 +82,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v break; case NETDEV_UP: - if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) { + if (netif_running(br->dev) && netif_oper_up(dev)) { spin_lock_bh(&br->lock); br_stp_enable_port(p); spin_unlock_bh(&br->lock); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 8d83be5..d2c043a 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -18,6 +18,7 @@ #include <linux/netpoll.h> #include <linux/u64_stats_sync.h> #include <net/route.h> +#include <linux/if_vlan.h> #define BR_HASH_BITS 8 #define BR_HASH_SIZE (1 << BR_HASH_BITS) @@ -26,6 +27,7 @@ #define BR_PORT_BITS 10 #define BR_MAX_PORTS (1<<BR_PORT_BITS) +#define BR_VLAN_BITMAP_LEN BITS_TO_LONGS(VLAN_N_VID) #define BR_VERSION "2.3" @@ -61,6 +63,20 @@ struct br_ip #endif } u; __be16 proto; + __u16 vid; +}; + +struct net_port_vlans { + u16 port_idx; + u16 pvid; + union { + struct net_bridge_port *port; + struct net_bridge *br; + } parent; + struct rcu_head rcu; + unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN]; + unsigned long untagged_bitmap[BR_VLAN_BITMAP_LEN]; + u16 num_vlans; }; struct net_bridge_fdb_entry @@ -74,6 +90,7 @@ struct net_bridge_fdb_entry mac_addr addr; unsigned char is_local; unsigned char is_static; + __u16 vlan_id; }; struct net_bridge_port_group { @@ -139,6 +156,7 @@ struct net_bridge_port #define BR_BPDU_GUARD 0x00000002 #define BR_ROOT_BLOCK 0x00000004 #define BR_MULTICAST_FAST_LEAVE 0x00000008 +#define BR_ADMIN_COST 0x00000010 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; @@ -156,6 +174,9 @@ struct net_bridge_port #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + struct net_port_vlans __rcu *vlan_info; +#endif }; #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT) @@ -197,9 +218,6 @@ struct net_bridge bool nf_call_ip6tables; bool nf_call_arptables; #endif - unsigned long flags; -#define BR_SET_MAC_ADDR 0x00000001 - u16 group_fwd_mask; /* STP */ @@ -260,6 +278,10 @@ struct net_bridge struct timer_list topology_change_timer; struct timer_list gc_timer; struct kobject *ifobj; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + u8 vlan_enabled; + struct net_port_vlans __rcu *vlan_info; +#endif }; struct br_input_skb_cb { @@ -355,18 +377,22 @@ extern void br_fdb_cleanup(unsigned long arg); extern void br_fdb_delete_by_port(struct net_bridge *br, const struct net_bridge_port *p, int do_all); extern struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr); + const unsigned char *addr, + __u16 vid); extern int br_fdb_test_addr(struct net_device *dev, unsigned char *addr); extern int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count, unsigned long off); extern int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr); + const unsigned char *addr, + u16 vid); extern void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr); + const unsigned char *addr, + u16 vid); +extern int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid); -extern int br_fdb_delete(struct ndmsg *ndm, +extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], @@ -417,7 +443,7 @@ extern int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb); extern struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, - struct sk_buff *skb); + struct sk_buff *skb, u16 vid); extern void br_multicast_add_port(struct net_bridge_port *port); extern void br_multicast_del_port(struct net_bridge_port *port); extern void br_multicast_enable_port(struct net_bridge_port *port); @@ -479,7 +505,7 @@ static inline int br_multicast_rcv(struct net_bridge *br, } static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, - struct sk_buff *skb) + struct sk_buff *skb, u16 vid) { return NULL; } @@ -526,6 +552,148 @@ static inline bool br_multicast_is_router(struct net_bridge *br) { return 0; } +static inline void br_mdb_init(void) +{ +} +static inline void br_mdb_uninit(void) +{ +} +#endif + +/* br_vlan.c */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +extern bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, + struct sk_buff *skb, u16 *vid); +extern bool br_allowed_egress(struct net_bridge *br, + const struct net_port_vlans *v, + const struct sk_buff *skb); +extern struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_port_vlans *v, + struct sk_buff *skb); +extern int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); +extern int br_vlan_delete(struct net_bridge *br, u16 vid); +extern void br_vlan_flush(struct net_bridge *br); +extern int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); +extern int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); +extern int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); +extern void nbp_vlan_flush(struct net_bridge_port *port); +extern bool nbp_vlan_find(struct net_bridge_port *port, u16 vid); + +static inline struct net_port_vlans *br_get_vlan_info( + const struct net_bridge *br) +{ + return rcu_dereference_rtnl(br->vlan_info); +} + +static inline struct net_port_vlans *nbp_get_vlan_info( + const struct net_bridge_port *p) +{ + return rcu_dereference_rtnl(p->vlan_info); +} + +/* Since bridge now depends on 8021Q module, but the time bridge sees the + * skb, the vlan tag will always be present if the frame was tagged. + */ +static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid) +{ + int err = 0; + + if (vlan_tx_tag_present(skb)) + *vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK; + else { + *vid = 0; + err = -EINVAL; + } + + return err; +} + +static inline u16 br_get_pvid(const struct net_port_vlans *v) +{ + /* Return just the VID if it is set, or VLAN_N_VID (invalid vid) if + * vid wasn't set + */ + smp_rmb(); + return (v->pvid & VLAN_TAG_PRESENT) ? + (v->pvid & ~VLAN_TAG_PRESENT) : + VLAN_N_VID; +} + +#else +static inline bool br_allowed_ingress(struct net_bridge *br, + struct net_port_vlans *v, + struct sk_buff *skb, + u16 *vid) +{ + return true; +} + +static inline bool br_allowed_egress(struct net_bridge *br, + const struct net_port_vlans *v, + const struct sk_buff *skb) +{ + return true; +} + +static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_port_vlans *v, + struct sk_buff *skb) +{ + return skb; +} + +static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) +{ + return -EOPNOTSUPP; +} + +static inline int br_vlan_delete(struct net_bridge *br, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline void br_vlan_flush(struct net_bridge *br) +{ +} + +static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) +{ + return -EOPNOTSUPP; +} + +static inline int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline void nbp_vlan_flush(struct net_bridge_port *port) +{ +} + +static inline struct net_port_vlans *br_get_vlan_info( + const struct net_bridge *br) +{ + return NULL; +} +static inline struct net_port_vlans *nbp_get_vlan_info( + const struct net_bridge_port *p) +{ + return NULL; +} + +static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid) +{ + return false; +} + +static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag) +{ + return 0; +} +static inline u16 br_get_pvid(const struct net_port_vlans *v) +{ + return VLAN_N_VID; /* Returns invalid vid */ +} #endif /* br_netfilter.c */ @@ -588,8 +756,9 @@ extern int br_netlink_init(void); extern void br_netlink_fini(void); extern void br_ifinfo_notify(int event, struct net_bridge_port *port); extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); +extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev); + struct net_device *dev, u32 filter_mask); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 7f884e3..8660ea3 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -16,6 +16,7 @@ #include <linux/etherdevice.h> #include <linux/llc.h> #include <linux/slab.h> +#include <linux/pkt_sched.h> #include <net/net_namespace.h> #include <net/llc.h> #include <net/llc_pdu.h> @@ -40,6 +41,7 @@ static void br_send_bpdu(struct net_bridge_port *p, skb->dev = p->dev; skb->protocol = htons(ETH_P_802_2); + skb->priority = TC_PRIO_CONTROL; skb_reserve(skb, LLC_RESERVE); memcpy(__skb_put(skb, length), data, length); diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 9d5a414..d45e760 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -54,7 +54,7 @@ void br_stp_enable_bridge(struct net_bridge *br) br_config_bpdu_generation(br); list_for_each_entry(p, &br->port_list, list) { - if ((p->dev->flags & IFF_UP) && netif_carrier_ok(p->dev)) + if (netif_running(p->dev) && netif_oper_up(p->dev)) br_stp_enable_port(p); } @@ -216,7 +216,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br) struct net_bridge_port *p; /* user has chosen a value so keep it */ - if (br->flags & BR_SET_MAC_ADDR) + if (br->dev->addr_assign_type == NET_ADDR_SET) return false; list_for_each_entry(p, &br->port_list, list) { @@ -288,6 +288,7 @@ int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost) path_cost > BR_MAX_PATH_COST) return -ERANGE; + p->flags |= BR_ADMIN_COST; p->path_cost = path_cost; br_configuration_update(p->br); br_port_state_selection(p->br); diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 5913a3a..8baa9c0 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -691,6 +691,24 @@ static ssize_t store_nf_call_arptables( static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR, show_nf_call_arptables, store_nf_call_arptables); #endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +static ssize_t show_vlan_filtering(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->vlan_enabled); +} + +static ssize_t store_vlan_filtering(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_filter_toggle); +} +static DEVICE_ATTR(vlan_filtering, S_IRUGO | S_IWUSR, + show_vlan_filtering, store_vlan_filtering); +#endif static struct attribute *bridge_attrs[] = { &dev_attr_forward_delay.attr, @@ -732,6 +750,9 @@ static struct attribute *bridge_attrs[] = { &dev_attr_nf_call_ip6tables.attr, &dev_attr_nf_call_arptables.attr, #endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + &dev_attr_vlan_filtering.attr, +#endif NULL }; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c new file mode 100644 index 0000000..93dde75 --- /dev/null +++ b/net/bridge/br_vlan.c @@ -0,0 +1,415 @@ +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> + +#include "br_private.h" + +static void __vlan_add_pvid(struct net_port_vlans *v, u16 vid) +{ + if (v->pvid == vid) + return; + + smp_wmb(); + v->pvid = vid; +} + +static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid) +{ + if (v->pvid != vid) + return; + + smp_wmb(); + v->pvid = 0; +} + +static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags) +{ + if (flags & BRIDGE_VLAN_INFO_PVID) + __vlan_add_pvid(v, vid); + + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) + set_bit(vid, v->untagged_bitmap); +} + +static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) +{ + struct net_bridge_port *p = NULL; + struct net_bridge *br; + struct net_device *dev; + int err; + + if (test_bit(vid, v->vlan_bitmap)) { + __vlan_add_flags(v, vid, flags); + return 0; + } + + if (vid) { + if (v->port_idx) { + p = v->parent.port; + br = p->br; + dev = p->dev; + } else { + br = v->parent.br; + dev = br->dev; + } + + if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) { + /* Add VLAN to the device filter if it is supported. + * Stricly speaking, this is not necessary now, since + * devices are made promiscuous by the bridge, but if + * that ever changes this code will allow tagged + * traffic to enter the bridge. + */ + err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, vid); + if (err) + return err; + } + + err = br_fdb_insert(br, p, dev->dev_addr, vid); + if (err) { + br_err(br, "failed insert local address into bridge " + "forwarding table\n"); + goto out_filt; + } + + } + + set_bit(vid, v->vlan_bitmap); + v->num_vlans++; + __vlan_add_flags(v, vid, flags); + + return 0; + +out_filt: + if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) + dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); + return err; +} + +static int __vlan_del(struct net_port_vlans *v, u16 vid) +{ + if (!test_bit(vid, v->vlan_bitmap)) + return -EINVAL; + + __vlan_delete_pvid(v, vid); + clear_bit(vid, v->untagged_bitmap); + + if (v->port_idx && vid) { + struct net_device *dev = v->parent.port->dev; + + if (dev->features & NETIF_F_HW_VLAN_FILTER) + dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); + } + + clear_bit(vid, v->vlan_bitmap); + v->num_vlans--; + if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + if (v->port_idx) + rcu_assign_pointer(v->parent.port->vlan_info, NULL); + else + rcu_assign_pointer(v->parent.br->vlan_info, NULL); + kfree_rcu(v, rcu); + } + return 0; +} + +static void __vlan_flush(struct net_port_vlans *v) +{ + smp_wmb(); + v->pvid = 0; + bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN); + if (v->port_idx) + rcu_assign_pointer(v->parent.port->vlan_info, NULL); + else + rcu_assign_pointer(v->parent.br->vlan_info, NULL); + kfree_rcu(v, rcu); +} + +/* Strip the tag from the packet. Will return skb with tci set 0. */ +static struct sk_buff *br_vlan_untag(struct sk_buff *skb) +{ + if (skb->protocol != htons(ETH_P_8021Q)) { + skb->vlan_tci = 0; + return skb; + } + + skb->vlan_tci = 0; + skb = vlan_untag(skb); + if (skb) + skb->vlan_tci = 0; + + return skb; +} + +struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_port_vlans *pv, + struct sk_buff *skb) +{ + u16 vid; + + if (!br->vlan_enabled) + goto out; + + /* At this point, we know that the frame was filtered and contains + * a valid vlan id. If the vlan id is set in the untagged bitmap, + * send untagged; otherwise, send taged. + */ + br_vlan_get_tag(skb, &vid); + if (test_bit(vid, pv->untagged_bitmap)) + skb = br_vlan_untag(skb); + else { + /* Egress policy says "send tagged". If output device + * is the bridge, we need to add the VLAN header + * ourselves since we'll be going through the RX path. + * Sending to ports puts the frame on the TX path and + * we let dev_hard_start_xmit() add the header. + */ + if (skb->protocol != htons(ETH_P_8021Q) && + pv->port_idx == 0) { + /* vlan_put_tag expects skb->data to point to + * mac header. + */ + skb_push(skb, ETH_HLEN); + skb = __vlan_put_tag(skb, skb->vlan_tci); + if (!skb) + goto out; + /* put skb->data back to where it was */ + skb_pull(skb, ETH_HLEN); + skb->vlan_tci = 0; + } + } + +out: + return skb; +} + +/* Called under RCU */ +bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, + struct sk_buff *skb, u16 *vid) +{ + /* If VLAN filtering is disabled on the bridge, all packets are + * permitted. + */ + if (!br->vlan_enabled) + return true; + + /* If there are no vlan in the permitted list, all packets are + * rejected. + */ + if (!v) + return false; + + if (br_vlan_get_tag(skb, vid)) { + u16 pvid = br_get_pvid(v); + + /* Frame did not have a tag. See if pvid is set + * on this port. That tells us which vlan untagged + * traffic belongs to. + */ + if (pvid == VLAN_N_VID) + return false; + + /* PVID is set on this port. Any untagged ingress + * frame is considered to belong to this vlan. + */ + __vlan_hwaccel_put_tag(skb, pvid); + return true; + } + + /* Frame had a valid vlan tag. See if vlan is allowed */ + if (test_bit(*vid, v->vlan_bitmap)) + return true; + + return false; +} + +/* Called under RCU. */ +bool br_allowed_egress(struct net_bridge *br, + const struct net_port_vlans *v, + const struct sk_buff *skb) +{ + u16 vid; + + if (!br->vlan_enabled) + return true; + + if (!v) + return false; + + br_vlan_get_tag(skb, &vid); + if (test_bit(vid, v->vlan_bitmap)) + return true; + + return false; +} + +/* Must be protected by RTNL */ +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) +{ + struct net_port_vlans *pv = NULL; + int err; + + ASSERT_RTNL(); + + pv = rtnl_dereference(br->vlan_info); + if (pv) + return __vlan_add(pv, vid, flags); + + /* Create port vlan infomration + */ + pv = kzalloc(sizeof(*pv), GFP_KERNEL); + if (!pv) + return -ENOMEM; + + pv->parent.br = br; + err = __vlan_add(pv, vid, flags); + if (err) + goto out; + + rcu_assign_pointer(br->vlan_info, pv); + return 0; +out: + kfree(pv); + return err; +} + +/* Must be protected by RTNL */ +int br_vlan_delete(struct net_bridge *br, u16 vid) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + + pv = rtnl_dereference(br->vlan_info); + if (!pv) + return -EINVAL; + + if (vid) { + /* If the VID !=0 remove fdb for this vid. VID 0 is special + * in that it's the default and is always there in the fdb. + */ + spin_lock_bh(&br->hash_lock); + fdb_delete_by_addr(br, br->dev->dev_addr, vid); + spin_unlock_bh(&br->hash_lock); + } + + __vlan_del(pv, vid); + return 0; +} + +void br_vlan_flush(struct net_bridge *br) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + pv = rtnl_dereference(br->vlan_info); + if (!pv) + return; + + __vlan_flush(pv); +} + +int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) +{ + if (!rtnl_trylock()) + return restart_syscall(); + + if (br->vlan_enabled == val) + goto unlock; + + br->vlan_enabled = val; + +unlock: + rtnl_unlock(); + return 0; +} + +/* Must be protected by RTNL */ +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) +{ + struct net_port_vlans *pv = NULL; + int err; + + ASSERT_RTNL(); + + pv = rtnl_dereference(port->vlan_info); + if (pv) + return __vlan_add(pv, vid, flags); + + /* Create port vlan infomration + */ + pv = kzalloc(sizeof(*pv), GFP_KERNEL); + if (!pv) { + err = -ENOMEM; + goto clean_up; + } + + pv->port_idx = port->port_no; + pv->parent.port = port; + err = __vlan_add(pv, vid, flags); + if (err) + goto clean_up; + + rcu_assign_pointer(port->vlan_info, pv); + return 0; + +clean_up: + kfree(pv); + return err; +} + +/* Must be protected by RTNL */ +int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + + pv = rtnl_dereference(port->vlan_info); + if (!pv) + return -EINVAL; + + if (vid) { + /* If the VID !=0 remove fdb for this vid. VID 0 is special + * in that it's the default and is always there in the fdb. + */ + spin_lock_bh(&port->br->hash_lock); + fdb_delete_by_addr(port->br, port->dev->dev_addr, vid); + spin_unlock_bh(&port->br->hash_lock); + } + + return __vlan_del(pv, vid); +} + +void nbp_vlan_flush(struct net_bridge_port *port) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + + pv = rtnl_dereference(port->vlan_info); + if (!pv) + return; + + __vlan_flush(pv); +} + +bool nbp_vlan_find(struct net_bridge_port *port, u16 vid) +{ + struct net_port_vlans *pv; + bool found = false; + + rcu_read_lock(); + pv = rcu_dereference(port->vlan_info); + + if (!pv) + goto out; + + if (test_bit(vid, pv->vlan_bitmap)) + found = true; + +out: + rcu_read_unlock(); + return found; +} diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 3476ec4..3bf43f7 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -70,8 +70,7 @@ static void ulog_send(unsigned int nlgroup) { ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup]; - if (timer_pending(&ub->timer)) - del_timer(&ub->timer); + del_timer(&ub->timer); if (!ub->skb) return; @@ -319,8 +318,7 @@ static void __exit ebt_ulog_fini(void) xt_unregister_target(&ebt_ulog_tg_reg); for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { ub = &ulog_buffers[i]; - if (timer_pending(&ub->timer)) - del_timer(&ub->timer); + del_timer(&ub->timer); spin_lock_bh(&ub->lock); if (ub->skb) { kfree_skb(ub->skb); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 5fe2ff3..8d493c9 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1472,16 +1472,17 @@ static int do_ebt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { int ret; + struct net *net = sock_net(sk); - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; switch(cmd) { case EBT_SO_SET_ENTRIES: - ret = do_replace(sock_net(sk), user, len); + ret = do_replace(net, user, len); break; case EBT_SO_SET_COUNTERS: - ret = update_counters(sock_net(sk), user, len); + ret = update_counters(net, user, len); break; default: ret = -EINVAL; @@ -1494,14 +1495,15 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) int ret; struct ebt_replace tmp; struct ebt_table *t; + struct net *net = sock_net(sk); - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; - t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex); + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; @@ -2279,16 +2281,17 @@ static int compat_do_ebt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { int ret; + struct net *net = sock_net(sk); - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case EBT_SO_SET_ENTRIES: - ret = compat_do_replace(sock_net(sk), user, len); + ret = compat_do_replace(net, user, len); break; case EBT_SO_SET_COUNTERS: - ret = compat_update_counters(sock_net(sk), user, len); + ret = compat_update_counters(net, user, len); break; default: ret = -EINVAL; @@ -2302,8 +2305,9 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, int ret; struct compat_ebt_replace tmp; struct ebt_table *t; + struct net *net = sock_net(sk); - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; /* try real handler in case userland supplied needed padding */ @@ -2314,7 +2318,7 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; - t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex); + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 1ae1d9c..21760f0 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -118,7 +118,7 @@ static struct caif_device_entry *caif_get(struct net_device *dev) return NULL; } -void caif_flow_cb(struct sk_buff *skb) +static void caif_flow_cb(struct sk_buff *skb) { struct caif_device_entry *caifd; void (*dtor)(struct sk_buff *skb) = NULL; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 095259f..ff2ff3c 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -286,6 +286,8 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, if (m->msg_flags&MSG_OOB) goto read_error; + m->msg_namelen = 0; + skb = skb_recv_datagram(sk, flags, 0 , &ret); if (!skb) goto read_error; diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 3ebc8cb..ef8ebaa 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -81,8 +81,8 @@ static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, layr->up->ctrlcmd(layr->up, ctrl, layr->id); } -struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN], - u8 braddr[ETH_ALEN]) +static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN], + u8 braddr[ETH_ALEN]) { struct cfusbl *this = kmalloc(sizeof(struct cfusbl), GFP_ATOMIC); diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index ba9cfd4..f1dbddb 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -402,7 +402,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid); if (phyinfo == NULL) { - pr_err("ERROR: Link Layer Device dissapeared" + pr_err("ERROR: Link Layer Device disappeared" "while connecting\n"); goto unlock; } diff --git a/net/can/Kconfig b/net/can/Kconfig index 0320069..a15c0e0 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -16,10 +16,11 @@ menuconfig CAN If you want CAN support you should say Y here and also to the specific driver for your controller(s) below. +if CAN + config CAN_RAW tristate "Raw CAN Protocol (raw access with CAN-ID filtering)" - depends on CAN - default N + default y ---help--- The raw CAN protocol option offers access to the CAN bus via the BSD socket API. You probably want to use the raw socket in @@ -29,8 +30,7 @@ config CAN_RAW config CAN_BCM tristate "Broadcast Manager CAN Protocol (with content filtering)" - depends on CAN - default N + default y ---help--- The Broadcast Manager offers content filtering, timeout monitoring, sending of RTR frames, and cyclic CAN messages without permanent user @@ -42,8 +42,7 @@ config CAN_BCM config CAN_GW tristate "CAN Gateway/Router (with netlink configuration)" - depends on CAN - default N + default y ---help--- The CAN Gateway/Router is used to route (and modify) CAN frames. It is based on the PF_CAN core infrastructure for msg filtering and @@ -53,3 +52,5 @@ config CAN_GW by the netlink configuration interface known e.g. from iptables. source "drivers/net/can/Kconfig" + +endif diff --git a/net/can/af_can.c b/net/can/af_can.c index ddac1ee..c48e522 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -516,7 +516,6 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, { struct receiver *r = NULL; struct hlist_head *rl; - struct hlist_node *next; struct dev_rcv_lists *d; if (dev && dev->type != ARPHRD_CAN) @@ -540,7 +539,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, * been registered before. */ - hlist_for_each_entry_rcu(r, next, rl, list) { + hlist_for_each_entry_rcu(r, rl, list) { if (r->can_id == can_id && r->mask == mask && r->func == func && r->data == data) break; @@ -552,7 +551,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, * will be NULL, while r will point to the last item of the list. */ - if (!next) { + if (!r) { printk(KERN_ERR "BUG: receive list entry not found for " "dev %s, id %03X, mask %03X\n", DNAME(dev), can_id, mask); @@ -590,7 +589,6 @@ static inline void deliver(struct sk_buff *skb, struct receiver *r) static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) { struct receiver *r; - struct hlist_node *n; int matches = 0; struct can_frame *cf = (struct can_frame *)skb->data; canid_t can_id = cf->can_id; @@ -600,7 +598,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) if (can_id & CAN_ERR_FLAG) { /* check for error message frame entries only */ - hlist_for_each_entry_rcu(r, n, &d->rx[RX_ERR], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_ERR], list) { if (can_id & r->mask) { deliver(skb, r); matches++; @@ -610,13 +608,13 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) } /* check for unfiltered entries */ - hlist_for_each_entry_rcu(r, n, &d->rx[RX_ALL], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_ALL], list) { deliver(skb, r); matches++; } /* check for can_id/mask entries */ - hlist_for_each_entry_rcu(r, n, &d->rx[RX_FIL], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_FIL], list) { if ((can_id & r->mask) == r->can_id) { deliver(skb, r); matches++; @@ -624,7 +622,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) } /* check for inverted can_id/mask entries */ - hlist_for_each_entry_rcu(r, n, &d->rx[RX_INV], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_INV], list) { if ((can_id & r->mask) != r->can_id) { deliver(skb, r); matches++; @@ -636,7 +634,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) return matches; if (can_id & CAN_EFF_FLAG) { - hlist_for_each_entry_rcu(r, n, &d->rx[RX_EFF], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_EFF], list) { if (r->can_id == can_id) { deliver(skb, r); matches++; @@ -644,7 +642,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) } } else { can_id &= CAN_SFF_MASK; - hlist_for_each_entry_rcu(r, n, &d->rx_sff[can_id], list) { + hlist_for_each_entry_rcu(r, &d->rx_sff[can_id], list) { deliver(skb, r); matches++; } diff --git a/net/can/bcm.c b/net/can/bcm.c index 969b7cd..5dcb200 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -54,6 +54,7 @@ #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> +#include <linux/can/skb.h> #include <linux/can/bcm.h> #include <linux/slab.h> #include <net/sock.h> @@ -256,10 +257,13 @@ static void bcm_can_tx(struct bcm_op *op) return; } - skb = alloc_skb(CFSIZ, gfp_any()); + skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), gfp_any()); if (!skb) goto out; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = dev->ifindex; + memcpy(skb_put(skb, CFSIZ), cf, CFSIZ); /* send with loopback */ @@ -1199,11 +1203,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) if (!ifindex) return -ENODEV; - skb = alloc_skb(CFSIZ, GFP_KERNEL); - + skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), GFP_KERNEL); if (!skb) return -ENOMEM; + can_skb_reserve(skb); + err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ); if (err < 0) { kfree_skb(skb); @@ -1216,6 +1221,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) return -ENODEV; } + can_skb_prv(skb)->ifindex = dev->ifindex; skb->dev = dev; skb->sk = sk; err = can_send(skb, 1); /* send with loopback */ @@ -1627,7 +1633,7 @@ static void __exit bcm_module_exit(void) can_proto_unregister(&bcm_can_proto); if (proc_dir) - proc_net_remove(&init_net, "can-bcm"); + remove_proc_entry("can-bcm", init_net.proc_net); } module_init(bcm_module_init); diff --git a/net/can/gw.c b/net/can/gw.c index 574dda78e..117814a 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -42,6 +42,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> +#include <linux/kernel.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/rcupdate.h> @@ -52,19 +53,31 @@ #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> +#include <linux/can/skb.h> #include <linux/can/gw.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> -#define CAN_GW_VERSION "20101209" -static __initconst const char banner[] = - KERN_INFO "can: netlink gateway (rev " CAN_GW_VERSION ")\n"; +#define CAN_GW_VERSION "20130117" +#define CAN_GW_NAME "can-gw" MODULE_DESCRIPTION("PF_CAN netlink gateway"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>"); -MODULE_ALIAS("can-gw"); +MODULE_ALIAS(CAN_GW_NAME); + +#define CGW_MIN_HOPS 1 +#define CGW_MAX_HOPS 6 +#define CGW_DEFAULT_HOPS 1 + +static unsigned int max_hops __read_mostly = CGW_DEFAULT_HOPS; +module_param(max_hops, uint, S_IRUGO); +MODULE_PARM_DESC(max_hops, + "maximum " CAN_GW_NAME " routing hops for CAN frames " + "(valid values: " __stringify(CGW_MIN_HOPS) "-" + __stringify(CGW_MAX_HOPS) " hops, " + "default: " __stringify(CGW_DEFAULT_HOPS) ")"); static HLIST_HEAD(cgw_list); static struct notifier_block notifier; @@ -118,6 +131,7 @@ struct cgw_job { struct rcu_head rcu; u32 handled_frames; u32 dropped_frames; + u32 deleted_frames; struct cf_mod mod; union { /* CAN frame data source */ @@ -338,15 +352,38 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) struct sk_buff *nskb; int modidx = 0; - /* do not handle already routed frames - see comment below */ - if (skb_mac_header_was_set(skb)) + /* + * Do not handle CAN frames routed more than 'max_hops' times. + * In general we should never catch this delimiter which is intended + * to cover a misconfiguration protection (e.g. circular CAN routes). + * + * The Controller Area Network controllers only accept CAN frames with + * correct CRCs - which are not visible in the controller registers. + * According to skbuff.h documentation the csum_start element for IP + * checksums is undefined/unsued when ip_summed == CHECKSUM_UNNECESSARY. + * Only CAN skbs can be processed here which already have this property. + */ + +#define cgw_hops(skb) ((skb)->csum_start) + + BUG_ON(skb->ip_summed != CHECKSUM_UNNECESSARY); + + if (cgw_hops(skb) >= max_hops) { + /* indicate deleted frames due to misconfiguration */ + gwj->deleted_frames++; return; + } if (!(gwj->dst.dev->flags & IFF_UP)) { gwj->dropped_frames++; return; } + /* is sending the skb back to the incoming interface not allowed? */ + if (!(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK) && + can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex) + return; + /* * clone the given skb, which has not been done in can_rcv() * @@ -363,15 +400,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) return; } - /* - * Mark routed frames by setting some mac header length which is - * not relevant for the CAN frames located in the skb->data section. - * - * As dev->header_ops is not set in CAN netdevices no one is ever - * accessing the various header offsets in the CAN skbuffs anyway. - * E.g. using the packet socket to read CAN frames is still working. - */ - skb_set_mac_header(nskb, 8); + /* put the incremented hop counter in the cloned skb */ + cgw_hops(nskb) = cgw_hops(skb) + 1; nskb->dev = gwj->dst.dev; /* pointer to modifiable CAN frame */ @@ -427,16 +457,16 @@ static int cgw_notifier(struct notifier_block *nb, if (msg == NETDEV_UNREGISTER) { struct cgw_job *gwj = NULL; - struct hlist_node *n, *nx; + struct hlist_node *nx; ASSERT_RTNL(); - hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { if (gwj->src.dev == dev || gwj->dst.dev == dev) { hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); } } } @@ -472,6 +502,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } + if (gwj->deleted_frames) { + if (nla_put_u32(skb, CGW_DELETED, gwj->deleted_frames) < 0) + goto cancel; + } + /* check non default settings of attributes */ if (gwj->mod.modtype.and) { @@ -540,12 +575,11 @@ cancel: static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) { struct cgw_job *gwj = NULL; - struct hlist_node *n; int idx = 0; int s_idx = cb->args[0]; rcu_read_lock(); - hlist_for_each_entry_rcu(gwj, n, &cgw_list, list) { + hlist_for_each_entry_rcu(gwj, &cgw_list, list) { if (idx < s_idx) goto cont; @@ -771,6 +805,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, gwj->handled_frames = 0; gwj->dropped_frames = 0; + gwj->deleted_frames = 0; gwj->flags = r->flags; gwj->gwtype = r->gwtype; @@ -822,21 +857,21 @@ out: static void cgw_remove_all_jobs(void) { struct cgw_job *gwj = NULL; - struct hlist_node *n, *nx; + struct hlist_node *nx; ASSERT_RTNL(); - hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); } } static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct cgw_job *gwj = NULL; - struct hlist_node *n, *nx; + struct hlist_node *nx; struct rtcanmsg *r; struct cf_mod mod; struct can_can_gw ccgw; @@ -871,7 +906,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ASSERT_RTNL(); /* remove only the first matching entry */ - hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { if (gwj->flags != r->flags) continue; @@ -885,7 +920,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); err = 0; break; } @@ -895,7 +930,11 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) static __init int cgw_module_init(void) { - printk(banner); + /* sanitize given module parameter */ + max_hops = clamp_t(unsigned int, max_hops, CGW_MIN_HOPS, CGW_MAX_HOPS); + + pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n", + max_hops); cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job), 0, 0, NULL); diff --git a/net/can/proc.c b/net/can/proc.c index ae56690..1ab8c88 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -195,9 +195,8 @@ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list, struct net_device *dev) { struct receiver *r; - struct hlist_node *n; - hlist_for_each_entry_rcu(r, n, rx_list, list) { + hlist_for_each_entry_rcu(r, rx_list, list) { char *fmt = (r->can_id & CAN_EFF_FLAG)? " %-5s %08x %08x %pK %pK %8ld %s\n" : " %-5s %03x %08x %pK %pK %8ld %s\n"; @@ -531,5 +530,5 @@ void can_remove_proc(void) can_remove_proc_readentry(CAN_PROC_RCVLIST_SFF); if (can_dir) - proc_net_remove(&init_net, "can"); + remove_proc_entry("can", init_net.proc_net); } diff --git a/net/can/raw.c b/net/can/raw.c index 5b0e3e3..c1764e4 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -50,6 +50,7 @@ #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> +#include <linux/can/skb.h> #include <linux/can/raw.h> #include <net/sock.h> #include <net/net_namespace.h> @@ -699,11 +700,14 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, if (!dev) return -ENXIO; - skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, - &err); + skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv), + msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) goto put_dev; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = dev->ifindex; + err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) goto free_skb; diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig index cc04dd6..e50cc69 100644 --- a/net/ceph/Kconfig +++ b/net/ceph/Kconfig @@ -1,6 +1,6 @@ config CEPH_LIB - tristate "Ceph core library (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL + tristate "Ceph core library" + depends on INET select LIBCRC32C select CRYPTO_AES select CRYPTO diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index ee71ea2..e65e6e4 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -15,6 +15,8 @@ #include <linux/slab.h> #include <linux/statfs.h> #include <linux/string.h> +#include <linux/nsproxy.h> +#include <net/net_namespace.h> #include <linux/ceph/ceph_features.h> @@ -26,6 +28,22 @@ #include "crypto.h" +/* + * Module compatibility interface. For now it doesn't do anything, + * but its existence signals a certain level of functionality. + * + * The data buffer is used to pass information both to and from + * libceph. The return value indicates whether libceph determines + * it is compatible with the caller (from another kernel module), + * given the provided data. + * + * The data pointer can be null. + */ +bool libceph_compatible(void *data) +{ + return true; +} +EXPORT_SYMBOL(libceph_compatible); /* * find filename portion of a path (/foo/bar/baz -> baz) @@ -292,6 +310,9 @@ ceph_parse_options(char *options, const char *dev_name, int err = -ENOMEM; substring_t argstr[MAX_OPT_ARGS]; + if (current->nsproxy->net_ns != &init_net) + return ERR_PTR(-EINVAL); + opt = kzalloc(sizeof(*opt), GFP_KERNEL); if (!opt) return ERR_PTR(-ENOMEM); @@ -585,10 +606,8 @@ static int __init init_ceph_lib(void) if (ret < 0) goto out_crypto; - pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n", - CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL, - CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, - CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); + pr_info("loaded (mon/osd proto %d/%d)\n", + CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL); return 0; diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c index 3fbda04..1348df9 100644 --- a/net/ceph/ceph_strings.c +++ b/net/ceph/ceph_strings.c @@ -21,9 +21,15 @@ const char *ceph_osd_op_name(int op) switch (op) { case CEPH_OSD_OP_READ: return "read"; case CEPH_OSD_OP_STAT: return "stat"; + case CEPH_OSD_OP_MAPEXT: return "mapext"; + case CEPH_OSD_OP_SPARSE_READ: return "sparse-read"; + case CEPH_OSD_OP_NOTIFY: return "notify"; + case CEPH_OSD_OP_NOTIFY_ACK: return "notify-ack"; + case CEPH_OSD_OP_ASSERT_VER: return "assert-version"; case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; + case CEPH_OSD_OP_CREATE: return "create"; case CEPH_OSD_OP_WRITE: return "write"; case CEPH_OSD_OP_DELETE: return "delete"; case CEPH_OSD_OP_TRUNCATE: return "truncate"; @@ -39,6 +45,11 @@ const char *ceph_osd_op_name(int op) case CEPH_OSD_OP_TMAPUP: return "tmapup"; case CEPH_OSD_OP_TMAPGET: return "tmapget"; case CEPH_OSD_OP_TMAPPUT: return "tmapput"; + case CEPH_OSD_OP_WATCH: return "watch"; + + case CEPH_OSD_OP_CLONERANGE: return "clonerange"; + case CEPH_OSD_OP_ASSERT_SRC_VERSION: return "assert-src-version"; + case CEPH_OSD_OP_SRC_CMPXATTR: return "src-cmpxattr"; case CEPH_OSD_OP_GETXATTR: return "getxattr"; case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; @@ -53,6 +64,10 @@ const char *ceph_osd_op_name(int op) case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; case CEPH_OSD_OP_SCRUB: return "scrub"; + case CEPH_OSD_OP_SCRUB_RESERVE: return "scrub-reserve"; + case CEPH_OSD_OP_SCRUB_UNRESERVE: return "scrub-unreserve"; + case CEPH_OSD_OP_SCRUB_STOP: return "scrub-stop"; + case CEPH_OSD_OP_SCRUB_MAP: return "scrub-map"; case CEPH_OSD_OP_WRLOCK: return "wrlock"; case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; @@ -64,10 +79,34 @@ const char *ceph_osd_op_name(int op) case CEPH_OSD_OP_CALL: return "call"; case CEPH_OSD_OP_PGLS: return "pgls"; + case CEPH_OSD_OP_PGLS_FILTER: return "pgls-filter"; + case CEPH_OSD_OP_OMAPGETKEYS: return "omap-get-keys"; + case CEPH_OSD_OP_OMAPGETVALS: return "omap-get-vals"; + case CEPH_OSD_OP_OMAPGETHEADER: return "omap-get-header"; + case CEPH_OSD_OP_OMAPGETVALSBYKEYS: return "omap-get-vals-by-keys"; + case CEPH_OSD_OP_OMAPSETVALS: return "omap-set-vals"; + case CEPH_OSD_OP_OMAPSETHEADER: return "omap-set-header"; + case CEPH_OSD_OP_OMAPCLEAR: return "omap-clear"; + case CEPH_OSD_OP_OMAPRMKEYS: return "omap-rm-keys"; } return "???"; } +const char *ceph_osd_state_name(int s) +{ + switch (s) { + case CEPH_OSD_EXISTS: + return "exists"; + case CEPH_OSD_UP: + return "up"; + case CEPH_OSD_AUTOOUT: + return "autoout"; + case CEPH_OSD_NEW: + return "new"; + default: + return "???"; + } +} const char *ceph_pool_op_name(int op) { diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 35fce75..cbd06a9 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -287,6 +287,7 @@ static int is_out(const struct crush_map *map, const __u32 *weight, int item, in * @outpos: our position in that vector * @firstn: true if choosing "first n" items, false if choosing "indep" * @recurse_to_leaf: true if we want one device under each item of given type + * @descend_once: true if we should only try one descent before giving up * @out2: second output vector for leaf items (if @recurse_to_leaf) */ static int crush_choose(const struct crush_map *map, @@ -295,7 +296,7 @@ static int crush_choose(const struct crush_map *map, int x, int numrep, int type, int *out, int outpos, int firstn, int recurse_to_leaf, - int *out2) + int descend_once, int *out2) { int rep; unsigned int ftotal, flocal; @@ -391,7 +392,7 @@ static int crush_choose(const struct crush_map *map, } reject = 0; - if (recurse_to_leaf) { + if (!collide && recurse_to_leaf) { if (item < 0) { if (crush_choose(map, map->buckets[-1-item], @@ -399,6 +400,7 @@ static int crush_choose(const struct crush_map *map, x, outpos+1, 0, out2, outpos, firstn, 0, + map->chooseleaf_descend_once, NULL) <= outpos) /* didn't get leaf */ reject = 1; @@ -422,7 +424,10 @@ reject: ftotal++; flocal++; - if (collide && flocal <= map->choose_local_tries) + if (reject && descend_once) + /* let outer call try again */ + skip_rep = 1; + else if (collide && flocal <= map->choose_local_tries) /* retry locally a few times */ retry_bucket = 1; else if (map->choose_local_fallback_tries > 0 && @@ -485,6 +490,7 @@ int crush_do_rule(const struct crush_map *map, int i, j; int numrep; int firstn; + const int descend_once = 0; if ((__u32)ruleno >= map->max_rules) { dprintk(" bad ruleno %d\n", ruleno); @@ -544,7 +550,8 @@ int crush_do_rule(const struct crush_map *map, curstep->arg2, o+osize, j, firstn, - recurse_to_leaf, c+osize); + recurse_to_leaf, + descend_once, c+osize); } if (recurse_to_leaf) diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index af14cb4..6e7a236 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -423,7 +423,8 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, } } -int ceph_key_instantiate(struct key *key, struct key_preparsed_payload *prep) +static int ceph_key_instantiate(struct key *key, + struct key_preparsed_payload *prep) { struct ceph_crypto_key *ckey; size_t datalen = prep->datalen; @@ -458,12 +459,12 @@ err: return ret; } -int ceph_key_match(const struct key *key, const void *description) +static int ceph_key_match(const struct key *key, const void *description) { return strcmp(key->description, description) == 0; } -void ceph_key_destroy(struct key *key) { +static void ceph_key_destroy(struct key *key) { struct ceph_crypto_key *ckey = key->payload.data; ceph_crypto_key_destroy(ckey); diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 38b5dc1..00d051f 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -66,9 +66,9 @@ static int osdmap_show(struct seq_file *s, void *p) for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { struct ceph_pg_pool_info *pool = rb_entry(n, struct ceph_pg_pool_info, node); - seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", - pool->id, pool->v.pg_num, pool->pg_num_mask, - pool->v.lpg_num, pool->lpg_num_mask); + seq_printf(s, "pg_pool %llu pg_num %d / %d\n", + (unsigned long long)pool->id, pool->pg_num, + pool->pg_num_mask); } for (i = 0; i < client->osdc.osdmap->max_osd; i++) { struct ceph_entity_addr *addr = @@ -123,26 +123,16 @@ static int osdc_show(struct seq_file *s, void *pp) mutex_lock(&osdc->request_mutex); for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { struct ceph_osd_request *req; - struct ceph_osd_request_head *head; - struct ceph_osd_op *op; - int num_ops; - int opcode, olen; + int opcode; int i; req = rb_entry(p, struct ceph_osd_request, r_node); - seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, + seq_printf(s, "%lld\tosd%d\t%lld.%x\t", req->r_tid, req->r_osd ? req->r_osd->o_osd : -1, - le32_to_cpu(req->r_pgid.pool), - le16_to_cpu(req->r_pgid.ps)); + req->r_pgid.pool, req->r_pgid.seed); - head = req->r_request->front.iov_base; - op = (void *)(head + 1); - - num_ops = le16_to_cpu(head->num_ops); - olen = le32_to_cpu(head->object_len); - seq_printf(s, "%.*s", olen, - (const char *)(head->ops + num_ops)); + seq_printf(s, "%.*s", req->r_oid_len, req->r_oid); if (req->r_reassert_version.epoch) seq_printf(s, "\t%u'%llu", @@ -151,10 +141,9 @@ static int osdc_show(struct seq_file *s, void *pp) else seq_printf(s, "\t"); - for (i = 0; i < num_ops; i++) { - opcode = le16_to_cpu(op->op); + for (i = 0; i < req->r_num_ops; i++) { + opcode = le16_to_cpu(req->r_request_ops[i].op); seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); - op++; } seq_printf(s, "\n"); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 4d111fd..2c0669f 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -9,8 +9,9 @@ #include <linux/slab.h> #include <linux/socket.h> #include <linux/string.h> +#ifdef CONFIG_BLOCK #include <linux/bio.h> -#include <linux/blkdev.h> +#endif /* CONFIG_BLOCK */ #include <linux/dns_resolver.h> #include <net/tcp.h> @@ -97,6 +98,57 @@ #define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */ #define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */ +static bool con_flag_valid(unsigned long con_flag) +{ + switch (con_flag) { + case CON_FLAG_LOSSYTX: + case CON_FLAG_KEEPALIVE_PENDING: + case CON_FLAG_WRITE_PENDING: + case CON_FLAG_SOCK_CLOSED: + case CON_FLAG_BACKOFF: + return true; + default: + return false; + } +} + +static void con_flag_clear(struct ceph_connection *con, unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + clear_bit(con_flag, &con->flags); +} + +static void con_flag_set(struct ceph_connection *con, unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + set_bit(con_flag, &con->flags); +} + +static bool con_flag_test(struct ceph_connection *con, unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + return test_bit(con_flag, &con->flags); +} + +static bool con_flag_test_and_clear(struct ceph_connection *con, + unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + return test_and_clear_bit(con_flag, &con->flags); +} + +static bool con_flag_test_and_set(struct ceph_connection *con, + unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + return test_and_set_bit(con_flag, &con->flags); +} + /* static tag bytes (protocol control messages) */ static char tag_msg = CEPH_MSGR_TAG_MSG; static char tag_ack = CEPH_MSGR_TAG_ACK; @@ -114,7 +166,7 @@ static struct lock_class_key socket_class; static void queue_con(struct ceph_connection *con); static void con_work(struct work_struct *); -static void ceph_fault(struct ceph_connection *con); +static void con_fault(struct ceph_connection *con); /* * Nicely render a sockaddr as a string. An array of formatted @@ -171,7 +223,7 @@ static void encode_my_addr(struct ceph_messenger *msgr) */ static struct workqueue_struct *ceph_msgr_wq; -void _ceph_msgr_exit(void) +static void _ceph_msgr_exit(void) { if (ceph_msgr_wq) { destroy_workqueue(ceph_msgr_wq); @@ -308,7 +360,7 @@ static void ceph_sock_write_space(struct sock *sk) * buffer. See net/ipv4/tcp_input.c:tcp_check_space() * and net/core/stream.c:sk_stream_write_space(). */ - if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) { + if (con_flag_test(con, CON_FLAG_WRITE_PENDING)) { if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { dout("%s %p queueing write work\n", __func__, con); clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); @@ -333,7 +385,7 @@ static void ceph_sock_state_change(struct sock *sk) case TCP_CLOSE_WAIT: dout("%s TCP_CLOSE_WAIT\n", __func__); con_sock_state_closing(con); - set_bit(CON_FLAG_SOCK_CLOSED, &con->flags); + con_flag_set(con, CON_FLAG_SOCK_CLOSED); queue_con(con); break; case TCP_ESTABLISHED: @@ -474,7 +526,7 @@ static int con_close_socket(struct ceph_connection *con) * received a socket close event before we had the chance to * shut the socket down. */ - clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags); + con_flag_clear(con, CON_FLAG_SOCK_CLOSED); con_sock_state_closed(con); return rc; @@ -506,6 +558,7 @@ static void reset_connection(struct ceph_connection *con) { /* reset connection, out_queue, msg_ and connect_seq */ /* discard existing out_queue and msg_seq */ + dout("reset_connection %p\n", con); ceph_msg_remove_list(&con->out_queue); ceph_msg_remove_list(&con->out_sent); @@ -537,11 +590,10 @@ void ceph_con_close(struct ceph_connection *con) ceph_pr_addr(&con->peer_addr.in_addr)); con->state = CON_STATE_CLOSED; - clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */ - clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags); - clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); - clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags); - clear_bit(CON_FLAG_BACKOFF, &con->flags); + con_flag_clear(con, CON_FLAG_LOSSYTX); /* so we retry next connect */ + con_flag_clear(con, CON_FLAG_KEEPALIVE_PENDING); + con_flag_clear(con, CON_FLAG_WRITE_PENDING); + con_flag_clear(con, CON_FLAG_BACKOFF); reset_connection(con); con->peer_global_seq = 0; @@ -561,7 +613,7 @@ void ceph_con_open(struct ceph_connection *con, mutex_lock(&con->mutex); dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); - BUG_ON(con->state != CON_STATE_CLOSED); + WARN_ON(con->state != CON_STATE_CLOSED); con->state = CON_STATE_PREOPEN; con->peer_name.type = (__u8) entity_type; @@ -797,7 +849,7 @@ static void prepare_write_message(struct ceph_connection *con) /* no, queue up footer too and be done */ prepare_write_message_footer(con); - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); } /* @@ -818,7 +870,7 @@ static void prepare_write_ack(struct ceph_connection *con) &con->out_temp_ack); con->out_more = 1; /* more will follow.. eventually.. */ - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); } /* @@ -829,7 +881,7 @@ static void prepare_write_keepalive(struct ceph_connection *con) dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); } /* @@ -872,7 +924,7 @@ static void prepare_write_banner(struct ceph_connection *con) &con->msgr->my_enc_addr); con->out_more = 0; - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); } static int prepare_write_connect(struct ceph_connection *con) @@ -922,7 +974,7 @@ static int prepare_write_connect(struct ceph_connection *con) auth->authorizer_buf); con->out_more = 0; - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); return 0; } @@ -1506,13 +1558,6 @@ static int process_banner(struct ceph_connection *con) return 0; } -static void fail_protocol(struct ceph_connection *con) -{ - reset_connection(con); - BUG_ON(con->state != CON_STATE_NEGOTIATING); - con->state = CON_STATE_CLOSED; -} - static int process_connect(struct ceph_connection *con) { u64 sup_feat = con->msgr->supported_features; @@ -1530,7 +1575,7 @@ static int process_connect(struct ceph_connection *con) ceph_pr_addr(&con->peer_addr.in_addr), sup_feat, server_feat, server_feat & ~sup_feat); con->error_msg = "missing required protocol features"; - fail_protocol(con); + reset_connection(con); return -1; case CEPH_MSGR_TAG_BADPROTOVER: @@ -1541,7 +1586,7 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->out_connect.protocol_version), le32_to_cpu(con->in_reply.protocol_version)); con->error_msg = "protocol version mismatch"; - fail_protocol(con); + reset_connection(con); return -1; case CEPH_MSGR_TAG_BADAUTHORIZER: @@ -1631,11 +1676,11 @@ static int process_connect(struct ceph_connection *con) ceph_pr_addr(&con->peer_addr.in_addr), req_feat, server_feat, req_feat & ~server_feat); con->error_msg = "missing required protocol features"; - fail_protocol(con); + reset_connection(con); return -1; } - BUG_ON(con->state != CON_STATE_NEGOTIATING); + WARN_ON(con->state != CON_STATE_NEGOTIATING); con->state = CON_STATE_OPEN; con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); @@ -1649,7 +1694,7 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->in_reply.connect_seq)); if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY) - set_bit(CON_FLAG_LOSSYTX, &con->flags); + con_flag_set(con, CON_FLAG_LOSSYTX); con->delay = 0; /* reset backoff memory */ @@ -2086,15 +2131,14 @@ do_next: prepare_write_ack(con); goto more; } - if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING, - &con->flags)) { + if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { prepare_write_keepalive(con); goto more; } } /* Nothing to do! */ - clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_clear(con, CON_FLAG_WRITE_PENDING); dout("try_write nothing else to write.\n"); ret = 0; out: @@ -2132,7 +2176,6 @@ more: if (ret < 0) goto out; - BUG_ON(con->state != CON_STATE_CONNECTING); con->state = CON_STATE_NEGOTIATING; /* @@ -2160,7 +2203,7 @@ more: goto more; } - BUG_ON(con->state != CON_STATE_OPEN); + WARN_ON(con->state != CON_STATE_OPEN); if (con->in_base_pos < 0) { /* @@ -2275,7 +2318,7 @@ static void queue_con(struct ceph_connection *con) static bool con_sock_closed(struct ceph_connection *con) { - if (!test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) + if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED)) return false; #define CASE(x) \ @@ -2302,6 +2345,41 @@ static bool con_sock_closed(struct ceph_connection *con) return true; } +static bool con_backoff(struct ceph_connection *con) +{ + int ret; + + if (!con_flag_test_and_clear(con, CON_FLAG_BACKOFF)) + return false; + + ret = queue_con_delay(con, round_jiffies_relative(con->delay)); + if (ret) { + dout("%s: con %p FAILED to back off %lu\n", __func__, + con, con->delay); + BUG_ON(ret == -ENOENT); + con_flag_set(con, CON_FLAG_BACKOFF); + } + + return true; +} + +/* Finish fault handling; con->mutex must *not* be held here */ + +static void con_fault_finish(struct ceph_connection *con) +{ + /* + * in case we faulted due to authentication, invalidate our + * current tickets so that we can get new ones. + */ + if (con->auth_retry && con->ops->invalidate_authorizer) { + dout("calling invalidate_authorizer()\n"); + con->ops->invalidate_authorizer(con); + } + + if (con->ops->fault) + con->ops->fault(con); +} + /* * Do some work on a connection. Drop a connection ref when we're done. */ @@ -2309,89 +2387,84 @@ static void con_work(struct work_struct *work) { struct ceph_connection *con = container_of(work, struct ceph_connection, work.work); - int ret; + bool fault; mutex_lock(&con->mutex); -restart: - if (con_sock_closed(con)) - goto fault; + while (true) { + int ret; - if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) { - dout("con_work %p backing off\n", con); - ret = queue_con_delay(con, round_jiffies_relative(con->delay)); - if (ret) { - dout("con_work %p FAILED to back off %lu\n", con, - con->delay); - BUG_ON(ret == -ENOENT); - set_bit(CON_FLAG_BACKOFF, &con->flags); + if ((fault = con_sock_closed(con))) { + dout("%s: con %p SOCK_CLOSED\n", __func__, con); + break; + } + if (con_backoff(con)) { + dout("%s: con %p BACKOFF\n", __func__, con); + break; + } + if (con->state == CON_STATE_STANDBY) { + dout("%s: con %p STANDBY\n", __func__, con); + break; + } + if (con->state == CON_STATE_CLOSED) { + dout("%s: con %p CLOSED\n", __func__, con); + BUG_ON(con->sock); + break; + } + if (con->state == CON_STATE_PREOPEN) { + dout("%s: con %p PREOPEN\n", __func__, con); + BUG_ON(con->sock); } - goto done; - } - if (con->state == CON_STATE_STANDBY) { - dout("con_work %p STANDBY\n", con); - goto done; - } - if (con->state == CON_STATE_CLOSED) { - dout("con_work %p CLOSED\n", con); - BUG_ON(con->sock); - goto done; - } - if (con->state == CON_STATE_PREOPEN) { - dout("con_work OPENING\n"); - BUG_ON(con->sock); - } + ret = try_read(con); + if (ret < 0) { + if (ret == -EAGAIN) + continue; + con->error_msg = "socket error on read"; + fault = true; + break; + } - ret = try_read(con); - if (ret == -EAGAIN) - goto restart; - if (ret < 0) { - con->error_msg = "socket error on read"; - goto fault; - } + ret = try_write(con); + if (ret < 0) { + if (ret == -EAGAIN) + continue; + con->error_msg = "socket error on write"; + fault = true; + } - ret = try_write(con); - if (ret == -EAGAIN) - goto restart; - if (ret < 0) { - con->error_msg = "socket error on write"; - goto fault; + break; /* If we make it to here, we're done */ } - -done: + if (fault) + con_fault(con); mutex_unlock(&con->mutex); -done_unlocked: - con->ops->put(con); - return; -fault: - ceph_fault(con); /* error/fault path */ - goto done_unlocked; -} + if (fault) + con_fault_finish(con); + con->ops->put(con); +} /* * Generic error/fault handler. A retry mechanism is used with * exponential backoff */ -static void ceph_fault(struct ceph_connection *con) - __releases(con->mutex) +static void con_fault(struct ceph_connection *con) { pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); dout("fault %p state %lu to peer %s\n", con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); - BUG_ON(con->state != CON_STATE_CONNECTING && + WARN_ON(con->state != CON_STATE_CONNECTING && con->state != CON_STATE_NEGOTIATING && con->state != CON_STATE_OPEN); con_close_socket(con); - if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) { + if (con_flag_test(con, CON_FLAG_LOSSYTX)) { dout("fault on LOSSYTX channel, marking CLOSED\n"); con->state = CON_STATE_CLOSED; - goto out_unlock; + return; } if (con->in_msg) { @@ -2408,9 +2481,9 @@ static void ceph_fault(struct ceph_connection *con) /* If there are no messages queued or keepalive pending, place * the connection in a STANDBY state */ if (list_empty(&con->out_queue) && - !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) { + !con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING)) { dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); - clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_clear(con, CON_FLAG_WRITE_PENDING); con->state = CON_STATE_STANDBY; } else { /* retry after a delay. */ @@ -2419,23 +2492,9 @@ static void ceph_fault(struct ceph_connection *con) con->delay = BASE_DELAY_INTERVAL; else if (con->delay < MAX_DELAY_INTERVAL) con->delay *= 2; - set_bit(CON_FLAG_BACKOFF, &con->flags); + con_flag_set(con, CON_FLAG_BACKOFF); queue_con(con); } - -out_unlock: - mutex_unlock(&con->mutex); - /* - * in case we faulted due to authentication, invalidate our - * current tickets so that we can get new ones. - */ - if (con->auth_retry && con->ops->invalidate_authorizer) { - dout("calling invalidate_authorizer()\n"); - con->ops->invalidate_authorizer(con); - } - - if (con->ops->fault) - con->ops->fault(con); } @@ -2476,8 +2535,8 @@ static void clear_standby(struct ceph_connection *con) dout("clear_standby %p and ++connect_seq\n", con); con->state = CON_STATE_PREOPEN; con->connect_seq++; - WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags)); - WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)); + WARN_ON(con_flag_test(con, CON_FLAG_WRITE_PENDING)); + WARN_ON(con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING)); } } @@ -2518,7 +2577,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) /* if there wasn't anything waiting to send before, queue * new work */ - if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0) + if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) queue_con(con); } EXPORT_SYMBOL(ceph_con_send); @@ -2607,8 +2666,8 @@ void ceph_con_keepalive(struct ceph_connection *con) mutex_lock(&con->mutex); clear_standby(con); mutex_unlock(&con->mutex); - if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 && - test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0) + if (con_flag_test_and_set(con, CON_FLAG_KEEPALIVE_PENDING) == 0 && + con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) queue_con(con); } EXPORT_SYMBOL(ceph_con_keepalive); @@ -2658,9 +2717,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, m->page_alignment = 0; m->pages = NULL; m->pagelist = NULL; +#ifdef CONFIG_BLOCK m->bio = NULL; m->bio_iter = NULL; m->bio_seg = 0; +#endif /* CONFIG_BLOCK */ m->trail = NULL; /* front */ diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 812eb3b..aef5b10 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -697,7 +697,7 @@ int ceph_monc_delete_snapid(struct ceph_mon_client *monc, u32 pool, u64 snapid) { return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, - pool, snapid, 0, 0); + pool, snapid, NULL, 0); } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 780caf6..d730dd4 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -23,7 +23,7 @@ static const struct ceph_connection_operations osd_con_ops; -static void send_queued(struct ceph_osd_client *osdc); +static void __send_queued(struct ceph_osd_client *osdc); static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd); static void __register_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); @@ -32,64 +32,12 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, static void __send_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); -static int op_needs_trail(int op) -{ - switch (op) { - case CEPH_OSD_OP_GETXATTR: - case CEPH_OSD_OP_SETXATTR: - case CEPH_OSD_OP_CMPXATTR: - case CEPH_OSD_OP_CALL: - case CEPH_OSD_OP_NOTIFY: - return 1; - default: - return 0; - } -} - static int op_has_extent(int op) { return (op == CEPH_OSD_OP_READ || op == CEPH_OSD_OP_WRITE); } -int ceph_calc_raw_layout(struct ceph_osd_client *osdc, - struct ceph_file_layout *layout, - u64 snapid, - u64 off, u64 *plen, u64 *bno, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op) -{ - struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; - u64 orig_len = *plen; - u64 objoff, objlen; /* extent in object */ - int r; - - reqhead->snapid = cpu_to_le64(snapid); - - /* object extent? */ - r = ceph_calc_file_object_mapping(layout, off, plen, bno, - &objoff, &objlen); - if (r < 0) - return r; - if (*plen < orig_len) - dout(" skipping last %llu, final file extent %llu~%llu\n", - orig_len - *plen, off, *plen); - - if (op_has_extent(op->op)) { - op->extent.offset = objoff; - op->extent.length = objlen; - } - req->r_num_pages = calc_pages_for(off, *plen); - req->r_page_alignment = off & ~PAGE_MASK; - if (op->op == CEPH_OSD_OP_WRITE) - op->payload_len = *plen; - - dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", - *bno, objoff, objlen, req->r_num_pages); - return 0; -} -EXPORT_SYMBOL(ceph_calc_raw_layout); - /* * Implement client access to distributed object storage cluster. * @@ -115,20 +63,48 @@ EXPORT_SYMBOL(ceph_calc_raw_layout); * * fill osd op in request message. */ -static int calc_layout(struct ceph_osd_client *osdc, - struct ceph_vino vino, +static int calc_layout(struct ceph_vino vino, struct ceph_file_layout *layout, u64 off, u64 *plen, struct ceph_osd_request *req, struct ceph_osd_req_op *op) { - u64 bno; + u64 orig_len = *plen; + u64 bno = 0; + u64 objoff = 0; + u64 objlen = 0; int r; - r = ceph_calc_raw_layout(osdc, layout, vino.snap, off, - plen, &bno, req, op); + /* object extent? */ + r = ceph_calc_file_object_mapping(layout, off, orig_len, &bno, + &objoff, &objlen); if (r < 0) return r; + if (objlen < orig_len) { + *plen = objlen; + dout(" skipping last %llu, final file extent %llu~%llu\n", + orig_len - *plen, off, *plen); + } + + if (op_has_extent(op->op)) { + u32 osize = le32_to_cpu(layout->fl_object_size); + op->extent.offset = objoff; + op->extent.length = objlen; + if (op->extent.truncate_size <= off - objoff) { + op->extent.truncate_size = 0; + } else { + op->extent.truncate_size -= off - objoff; + if (op->extent.truncate_size > osize) + op->extent.truncate_size = osize; + } + } + req->r_num_pages = calc_pages_for(off, *plen); + req->r_page_alignment = off & ~PAGE_MASK; + if (op->op == CEPH_OSD_OP_WRITE) + op->payload_len = *plen; + + dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", + bno, objoff, objlen, req->r_num_pages); snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); req->r_oid_len = strlen(req->r_oid); @@ -148,25 +124,19 @@ void ceph_osdc_release_request(struct kref *kref) if (req->r_request) ceph_msg_put(req->r_request); if (req->r_con_filling_msg) { - dout("%s revoking pages %p from con %p\n", __func__, - req->r_pages, req->r_con_filling_msg); + dout("%s revoking msg %p from con %p\n", __func__, + req->r_reply, req->r_con_filling_msg); ceph_msg_revoke_incoming(req->r_reply); req->r_con_filling_msg->ops->put(req->r_con_filling_msg); + req->r_con_filling_msg = NULL; } if (req->r_reply) ceph_msg_put(req->r_reply); if (req->r_own_pages) ceph_release_page_vector(req->r_pages, req->r_num_pages); -#ifdef CONFIG_BLOCK - if (req->r_bio) - bio_put(req->r_bio); -#endif ceph_put_snap_context(req->r_snapc); - if (req->r_trail) { - ceph_pagelist_release(req->r_trail); - kfree(req->r_trail); - } + ceph_pagelist_release(&req->r_trail); if (req->r_mempool) mempool_free(req, req->r_osdc->req_mempool); else @@ -174,37 +144,25 @@ void ceph_osdc_release_request(struct kref *kref) } EXPORT_SYMBOL(ceph_osdc_release_request); -static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail) -{ - int i = 0; - - if (needs_trail) - *needs_trail = 0; - while (ops[i].op) { - if (needs_trail && op_needs_trail(ops[i].op)) - *needs_trail = 1; - i++; - } - - return i; -} - struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, - int flags, struct ceph_snap_context *snapc, - struct ceph_osd_req_op *ops, + unsigned int num_ops, bool use_mempool, - gfp_t gfp_flags, - struct page **pages, - struct bio *bio) + gfp_t gfp_flags) { struct ceph_osd_request *req; struct ceph_msg *msg; - int needs_trail; - int num_op = get_num_ops(ops, &needs_trail); - size_t msg_size = sizeof(struct ceph_osd_request_head); - - msg_size += num_op*sizeof(struct ceph_osd_op); + size_t msg_size; + + msg_size = 4 + 4 + 8 + 8 + 4+8; + msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ + msg_size += 1 + 8 + 4 + 4; /* pg_t */ + msg_size += 4 + MAX_OBJ_NAME_SIZE; + msg_size += 2 + num_ops*sizeof(struct ceph_osd_op); + msg_size += 8; /* snapid */ + msg_size += 8; /* snap_seq */ + msg_size += 8 * (snapc ? snapc->num_snaps : 0); /* snaps */ + msg_size += 4; if (use_mempool) { req = mempool_alloc(osdc->req_mempool, gfp_flags); @@ -228,10 +186,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, INIT_LIST_HEAD(&req->r_req_lru_item); INIT_LIST_HEAD(&req->r_osd_item); - req->r_flags = flags; - - WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); - /* create reply message */ if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); @@ -244,20 +198,9 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, } req->r_reply = msg; - /* allocate space for the trailing data */ - if (needs_trail) { - req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags); - if (!req->r_trail) { - ceph_osdc_put_request(req); - return NULL; - } - ceph_pagelist_init(req->r_trail); - } + ceph_pagelist_init(&req->r_trail); /* create request message; allow space for oid */ - msg_size += MAX_OBJ_NAME_SIZE; - if (snapc) - msg_size += sizeof(u64) * snapc->num_snaps; if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); else @@ -270,13 +213,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, memset(msg->front.iov_base, 0, msg->front.iov_len); req->r_request = msg; - req->r_pages = pages; -#ifdef CONFIG_BLOCK - if (bio) { - req->r_bio = bio; - bio_get(req->r_bio); - } -#endif return req; } @@ -289,6 +225,8 @@ static void osd_req_encode_op(struct ceph_osd_request *req, dst->op = cpu_to_le16(src->op); switch (src->op) { + case CEPH_OSD_OP_STAT: + break; case CEPH_OSD_OP_READ: case CEPH_OSD_OP_WRITE: dst->extent.offset = @@ -300,52 +238,20 @@ static void osd_req_encode_op(struct ceph_osd_request *req, dst->extent.truncate_seq = cpu_to_le32(src->extent.truncate_seq); break; - - case CEPH_OSD_OP_GETXATTR: - case CEPH_OSD_OP_SETXATTR: - case CEPH_OSD_OP_CMPXATTR: - BUG_ON(!req->r_trail); - - dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); - dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); - dst->xattr.cmp_op = src->xattr.cmp_op; - dst->xattr.cmp_mode = src->xattr.cmp_mode; - ceph_pagelist_append(req->r_trail, src->xattr.name, - src->xattr.name_len); - ceph_pagelist_append(req->r_trail, src->xattr.val, - src->xattr.value_len); - break; case CEPH_OSD_OP_CALL: - BUG_ON(!req->r_trail); - dst->cls.class_len = src->cls.class_len; dst->cls.method_len = src->cls.method_len; dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); - ceph_pagelist_append(req->r_trail, src->cls.class_name, + ceph_pagelist_append(&req->r_trail, src->cls.class_name, src->cls.class_len); - ceph_pagelist_append(req->r_trail, src->cls.method_name, + ceph_pagelist_append(&req->r_trail, src->cls.method_name, src->cls.method_len); - ceph_pagelist_append(req->r_trail, src->cls.indata, + ceph_pagelist_append(&req->r_trail, src->cls.indata, src->cls.indata_len); break; - case CEPH_OSD_OP_ROLLBACK: - dst->snap.snapid = cpu_to_le64(src->snap.snapid); - break; case CEPH_OSD_OP_STARTSYNC: break; - case CEPH_OSD_OP_NOTIFY: - { - __le32 prot_ver = cpu_to_le32(src->watch.prot_ver); - __le32 timeout = cpu_to_le32(src->watch.timeout); - - BUG_ON(!req->r_trail); - - ceph_pagelist_append(req->r_trail, - &prot_ver, sizeof(prot_ver)); - ceph_pagelist_append(req->r_trail, - &timeout, sizeof(timeout)); - } case CEPH_OSD_OP_NOTIFY_ACK: case CEPH_OSD_OP_WATCH: dst->watch.cookie = cpu_to_le64(src->watch.cookie); @@ -356,6 +262,64 @@ static void osd_req_encode_op(struct ceph_osd_request *req, pr_err("unrecognized osd opcode %d\n", dst->op); WARN_ON(1); break; + case CEPH_OSD_OP_MAPEXT: + case CEPH_OSD_OP_MASKTRUNC: + case CEPH_OSD_OP_SPARSE_READ: + case CEPH_OSD_OP_NOTIFY: + case CEPH_OSD_OP_ASSERT_VER: + case CEPH_OSD_OP_WRITEFULL: + case CEPH_OSD_OP_TRUNCATE: + case CEPH_OSD_OP_ZERO: + case CEPH_OSD_OP_DELETE: + case CEPH_OSD_OP_APPEND: + case CEPH_OSD_OP_SETTRUNC: + case CEPH_OSD_OP_TRIMTRUNC: + case CEPH_OSD_OP_TMAPUP: + case CEPH_OSD_OP_TMAPPUT: + case CEPH_OSD_OP_TMAPGET: + case CEPH_OSD_OP_CREATE: + case CEPH_OSD_OP_ROLLBACK: + case CEPH_OSD_OP_OMAPGETKEYS: + case CEPH_OSD_OP_OMAPGETVALS: + case CEPH_OSD_OP_OMAPGETHEADER: + case CEPH_OSD_OP_OMAPGETVALSBYKEYS: + case CEPH_OSD_OP_MODE_RD: + case CEPH_OSD_OP_OMAPSETVALS: + case CEPH_OSD_OP_OMAPSETHEADER: + case CEPH_OSD_OP_OMAPCLEAR: + case CEPH_OSD_OP_OMAPRMKEYS: + case CEPH_OSD_OP_OMAP_CMP: + case CEPH_OSD_OP_CLONERANGE: + case CEPH_OSD_OP_ASSERT_SRC_VERSION: + case CEPH_OSD_OP_SRC_CMPXATTR: + case CEPH_OSD_OP_GETXATTR: + case CEPH_OSD_OP_GETXATTRS: + case CEPH_OSD_OP_CMPXATTR: + case CEPH_OSD_OP_SETXATTR: + case CEPH_OSD_OP_SETXATTRS: + case CEPH_OSD_OP_RESETXATTRS: + case CEPH_OSD_OP_RMXATTR: + case CEPH_OSD_OP_PULL: + case CEPH_OSD_OP_PUSH: + case CEPH_OSD_OP_BALANCEREADS: + case CEPH_OSD_OP_UNBALANCEREADS: + case CEPH_OSD_OP_SCRUB: + case CEPH_OSD_OP_SCRUB_RESERVE: + case CEPH_OSD_OP_SCRUB_UNRESERVE: + case CEPH_OSD_OP_SCRUB_STOP: + case CEPH_OSD_OP_SCRUB_MAP: + case CEPH_OSD_OP_WRLOCK: + case CEPH_OSD_OP_WRUNLOCK: + case CEPH_OSD_OP_RDLOCK: + case CEPH_OSD_OP_RDUNLOCK: + case CEPH_OSD_OP_UPLOCK: + case CEPH_OSD_OP_DNLOCK: + case CEPH_OSD_OP_PGLS: + case CEPH_OSD_OP_PGLS_FILTER: + pr_err("unsupported osd opcode %s\n", + ceph_osd_op_name(dst->op)); + WARN_ON(1); + break; } dst->payload_len = cpu_to_le32(src->payload_len); } @@ -365,75 +329,95 @@ static void osd_req_encode_op(struct ceph_osd_request *req, * */ void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 *plen, + u64 off, u64 len, unsigned int num_ops, struct ceph_osd_req_op *src_ops, - struct ceph_snap_context *snapc, - struct timespec *mtime, - const char *oid, - int oid_len) + struct ceph_snap_context *snapc, u64 snap_id, + struct timespec *mtime) { struct ceph_msg *msg = req->r_request; - struct ceph_osd_request_head *head; struct ceph_osd_req_op *src_op; - struct ceph_osd_op *op; void *p; - int num_op = get_num_ops(src_ops, NULL); - size_t msg_size = sizeof(*head) + num_op*sizeof(*op); + size_t msg_size; int flags = req->r_flags; - u64 data_len = 0; + u64 data_len; int i; - head = msg->front.iov_base; - op = (void *)(head + 1); - p = (void *)(op + num_op); - + req->r_num_ops = num_ops; + req->r_snapid = snap_id; req->r_snapc = ceph_get_snap_context(snapc); - head->client_inc = cpu_to_le32(1); /* always, for now. */ - head->flags = cpu_to_le32(flags); - if (flags & CEPH_OSD_FLAG_WRITE) - ceph_encode_timespec(&head->mtime, mtime); - head->num_ops = cpu_to_le16(num_op); - - - /* fill in oid */ - head->object_len = cpu_to_le32(oid_len); - memcpy(p, oid, oid_len); - p += oid_len; + /* encode request */ + msg->hdr.version = cpu_to_le16(4); + p = msg->front.iov_base; + ceph_encode_32(&p, 1); /* client_inc is always 1 */ + req->r_request_osdmap_epoch = p; + p += 4; + req->r_request_flags = p; + p += 4; + if (req->r_flags & CEPH_OSD_FLAG_WRITE) + ceph_encode_timespec(p, mtime); + p += sizeof(struct ceph_timespec); + req->r_request_reassert_version = p; + p += sizeof(struct ceph_eversion); /* will get filled in */ + + /* oloc */ + ceph_encode_8(&p, 4); + ceph_encode_8(&p, 4); + ceph_encode_32(&p, 8 + 4 + 4); + req->r_request_pool = p; + p += 8; + ceph_encode_32(&p, -1); /* preferred */ + ceph_encode_32(&p, 0); /* key len */ + + ceph_encode_8(&p, 1); + req->r_request_pgid = p; + p += 8 + 4; + ceph_encode_32(&p, -1); /* preferred */ + + /* oid */ + ceph_encode_32(&p, req->r_oid_len); + memcpy(p, req->r_oid, req->r_oid_len); + dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len); + p += req->r_oid_len; + + /* ops */ + ceph_encode_16(&p, num_ops); src_op = src_ops; - while (src_op->op) { - osd_req_encode_op(req, op, src_op); - src_op++; - op++; + req->r_request_ops = p; + for (i = 0; i < num_ops; i++, src_op++) { + osd_req_encode_op(req, p, src_op); + p += sizeof(struct ceph_osd_op); } - if (req->r_trail) - data_len += req->r_trail->length; - - if (snapc) { - head->snap_seq = cpu_to_le64(snapc->seq); - head->num_snaps = cpu_to_le32(snapc->num_snaps); + /* snaps */ + ceph_encode_64(&p, req->r_snapid); + ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0); + ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0); + if (req->r_snapc) { for (i = 0; i < snapc->num_snaps; i++) { - put_unaligned_le64(snapc->snaps[i], p); - p += sizeof(u64); + ceph_encode_64(&p, req->r_snapc->snaps[i]); } } + req->r_request_attempts = p; + p += 4; + + data_len = req->r_trail.length; if (flags & CEPH_OSD_FLAG_WRITE) { req->r_request->hdr.data_off = cpu_to_le16(off); - req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len); - } else if (data_len) { - req->r_request->hdr.data_off = 0; - req->r_request->hdr.data_len = cpu_to_le32(data_len); + data_len += len; } - + req->r_request->hdr.data_len = cpu_to_le32(data_len); req->r_request->page_alignment = req->r_page_alignment; BUG_ON(p > msg->front.iov_base + msg->front.iov_len); msg_size = p - msg->front.iov_base; msg->front.iov_len = msg_size; msg->hdr.front_len = cpu_to_le32(msg_size); + + dout("build_request msg_size was %d num_ops %d\n", (int)msg_size, + num_ops); return; } EXPORT_SYMBOL(ceph_osdc_build_request); @@ -459,34 +443,33 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply, + bool use_mempool, int page_align) { - struct ceph_osd_req_op ops[3]; + struct ceph_osd_req_op ops[2]; struct ceph_osd_request *req; + unsigned int num_op = 1; int r; + memset(&ops, 0, sizeof ops); + ops[0].op = opcode; ops[0].extent.truncate_seq = truncate_seq; ops[0].extent.truncate_size = truncate_size; - ops[0].payload_len = 0; if (do_sync) { ops[1].op = CEPH_OSD_OP_STARTSYNC; - ops[1].payload_len = 0; - ops[2].op = 0; - } else - ops[1].op = 0; - - req = ceph_osdc_alloc_request(osdc, flags, - snapc, ops, - use_mempool, - GFP_NOFS, NULL, NULL); + num_op++; + } + + req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool, + GFP_NOFS); if (!req) return ERR_PTR(-ENOMEM); + req->r_flags = flags; /* calculate max write size */ - r = calc_layout(osdc, vino, layout, off, plen, req, ops); + r = calc_layout(vino, layout, off, plen, req, ops); if (r < 0) return ERR_PTR(r); req->r_file_layout = *layout; /* keep a copy */ @@ -496,10 +479,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, req->r_num_pages = calc_pages_for(page_align, *plen); req->r_page_alignment = page_align; - ceph_osdc_build_request(req, off, plen, ops, - snapc, - mtime, - req->r_oid, req->r_oid_len); + ceph_osdc_build_request(req, off, *plen, num_op, ops, + snapc, vino.snap, mtime); return req; } @@ -623,8 +604,8 @@ static void osd_reset(struct ceph_connection *con) down_read(&osdc->map_sem); mutex_lock(&osdc->request_mutex); __kick_osd_requests(osdc, osd); + __send_queued(osdc); mutex_unlock(&osdc->request_mutex); - send_queued(osdc); up_read(&osdc->map_sem); } @@ -739,31 +720,35 @@ static void remove_old_osds(struct ceph_osd_client *osdc) */ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) { - struct ceph_osd_request *req; - int ret = 0; + struct ceph_entity_addr *peer_addr; dout("__reset_osd %p osd%d\n", osd, osd->o_osd); if (list_empty(&osd->o_requests) && list_empty(&osd->o_linger_requests)) { __remove_osd(osdc, osd); - ret = -ENODEV; - } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], - &osd->o_con.peer_addr, - sizeof(osd->o_con.peer_addr)) == 0 && - !ceph_con_opened(&osd->o_con)) { + + return -ENODEV; + } + + peer_addr = &osdc->osdmap->osd_addr[osd->o_osd]; + if (!memcmp(peer_addr, &osd->o_con.peer_addr, sizeof (*peer_addr)) && + !ceph_con_opened(&osd->o_con)) { + struct ceph_osd_request *req; + dout(" osd addr hasn't changed and connection never opened," " letting msgr retry"); /* touch each r_stamp for handle_timeout()'s benfit */ list_for_each_entry(req, &osd->o_requests, r_osd_item) req->r_stamp = jiffies; - ret = -EAGAIN; - } else { - ceph_con_close(&osd->o_con); - ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, - &osdc->osdmap->osd_addr[osd->o_osd]); - osd->o_incarnation++; + + return -EAGAIN; } - return ret; + + ceph_con_close(&osd->o_con); + ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, peer_addr); + osd->o_incarnation++; + + return 0; } static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new) @@ -961,20 +946,18 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger); static int __map_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req, int force_resend) { - struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_pg pgid; int acting[CEPH_PG_MAX_SIZE]; int o = -1, num = 0; int err; dout("map_request %p tid %lld\n", req, req->r_tid); - err = ceph_calc_object_layout(&reqhead->layout, req->r_oid, + err = ceph_calc_object_layout(&pgid, req->r_oid, &req->r_file_layout, osdc->osdmap); if (err) { list_move(&req->r_req_lru_item, &osdc->req_notarget); return err; } - pgid = reqhead->layout.ol_pgid; req->r_pgid = pgid; err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); @@ -991,8 +974,8 @@ static int __map_request(struct ceph_osd_client *osdc, (req->r_osd == NULL && o == -1)) return 0; /* no change */ - dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n", - req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, + dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n", + req->r_tid, pgid.pool, pgid.seed, o, req->r_osd ? req->r_osd->o_osd : -1); /* record full pg acting set */ @@ -1041,15 +1024,22 @@ out: static void __send_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { - struct ceph_osd_request_head *reqhead; - - dout("send_request %p tid %llu to osd%d flags %d\n", - req, req->r_tid, req->r_osd->o_osd, req->r_flags); + void *p; - reqhead = req->r_request->front.iov_base; - reqhead->osdmap_epoch = cpu_to_le32(osdc->osdmap->epoch); - reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ - reqhead->reassert_version = req->r_reassert_version; + dout("send_request %p tid %llu to osd%d flags %d pg %lld.%x\n", + req, req->r_tid, req->r_osd->o_osd, req->r_flags, + (unsigned long long)req->r_pgid.pool, req->r_pgid.seed); + + /* fill in message content that changes each time we send it */ + put_unaligned_le32(osdc->osdmap->epoch, req->r_request_osdmap_epoch); + put_unaligned_le32(req->r_flags, req->r_request_flags); + put_unaligned_le64(req->r_pgid.pool, req->r_request_pool); + p = req->r_request_pgid; + ceph_encode_64(&p, req->r_pgid.pool); + ceph_encode_32(&p, req->r_pgid.seed); + put_unaligned_le64(1, req->r_request_attempts); /* FIXME */ + memcpy(req->r_request_reassert_version, &req->r_reassert_version, + sizeof(req->r_reassert_version)); req->r_stamp = jiffies; list_move_tail(&req->r_req_lru_item, &osdc->req_lru); @@ -1062,16 +1052,13 @@ static void __send_request(struct ceph_osd_client *osdc, /* * Send any requests in the queue (req_unsent). */ -static void send_queued(struct ceph_osd_client *osdc) +static void __send_queued(struct ceph_osd_client *osdc) { struct ceph_osd_request *req, *tmp; - dout("send_queued\n"); - mutex_lock(&osdc->request_mutex); - list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) { + dout("__send_queued\n"); + list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) __send_request(osdc, req); - } - mutex_unlock(&osdc->request_mutex); } /* @@ -1123,8 +1110,8 @@ static void handle_timeout(struct work_struct *work) } __schedule_osd_timeout(osdc); + __send_queued(osdc); mutex_unlock(&osdc->request_mutex); - send_queued(osdc); up_read(&osdc->map_sem); } @@ -1152,6 +1139,26 @@ static void complete_request(struct ceph_osd_request *req) complete_all(&req->r_safe_completion); /* fsync waiter */ } +static int __decode_pgid(void **p, void *end, struct ceph_pg *pgid) +{ + __u8 v; + + ceph_decode_need(p, end, 1 + 8 + 4 + 4, bad); + v = ceph_decode_8(p); + if (v > 1) { + pr_warning("do not understand pg encoding %d > 1", v); + return -EINVAL; + } + pgid->pool = ceph_decode_64(p); + pgid->seed = ceph_decode_32(p); + *p += 4; + return 0; + +bad: + pr_warning("incomplete pg encoding"); + return -EINVAL; +} + /* * handle osd op reply. either call the callback if it is specified, * or do the completion to wake up the waiting thread. @@ -1159,22 +1166,42 @@ static void complete_request(struct ceph_osd_request *req) static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, struct ceph_connection *con) { - struct ceph_osd_reply_head *rhead = msg->front.iov_base; + void *p, *end; struct ceph_osd_request *req; u64 tid; - int numops, object_len, flags; + int object_len; + int numops, payload_len, flags; s32 result; + s32 retry_attempt; + struct ceph_pg pg; + int err; + u32 reassert_epoch; + u64 reassert_version; + u32 osdmap_epoch; + int i; tid = le64_to_cpu(msg->hdr.tid); - if (msg->front.iov_len < sizeof(*rhead)) - goto bad; - numops = le32_to_cpu(rhead->num_ops); - object_len = le32_to_cpu(rhead->object_len); - result = le32_to_cpu(rhead->result); - if (msg->front.iov_len != sizeof(*rhead) + object_len + - numops * sizeof(struct ceph_osd_op)) + dout("handle_reply %p tid %llu\n", msg, tid); + + p = msg->front.iov_base; + end = p + msg->front.iov_len; + + ceph_decode_need(&p, end, 4, bad); + object_len = ceph_decode_32(&p); + ceph_decode_need(&p, end, object_len, bad); + p += object_len; + + err = __decode_pgid(&p, end, &pg); + if (err) goto bad; - dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); + + ceph_decode_need(&p, end, 8 + 4 + 4 + 8 + 4, bad); + flags = ceph_decode_64(&p); + result = ceph_decode_32(&p); + reassert_epoch = ceph_decode_32(&p); + reassert_version = ceph_decode_64(&p); + osdmap_epoch = ceph_decode_32(&p); + /* lookup */ mutex_lock(&osdc->request_mutex); req = __lookup_request(osdc, tid); @@ -1184,7 +1211,38 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, return; } ceph_osdc_get_request(req); - flags = le32_to_cpu(rhead->flags); + + dout("handle_reply %p tid %llu req %p result %d\n", msg, tid, + req, result); + + ceph_decode_need(&p, end, 4, bad); + numops = ceph_decode_32(&p); + if (numops > CEPH_OSD_MAX_OP) + goto bad_put; + if (numops != req->r_num_ops) + goto bad_put; + payload_len = 0; + ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad); + for (i = 0; i < numops; i++) { + struct ceph_osd_op *op = p; + int len; + + len = le32_to_cpu(op->payload_len); + req->r_reply_op_len[i] = len; + dout(" op %d has %d bytes\n", i, len); + payload_len += len; + p += sizeof(*op); + } + if (payload_len != le32_to_cpu(msg->hdr.data_len)) { + pr_warning("sum of op payload lens %d != data_len %d", + payload_len, le32_to_cpu(msg->hdr.data_len)); + goto bad_put; + } + + ceph_decode_need(&p, end, 4 + numops * 4, bad); + retry_attempt = ceph_decode_32(&p); + for (i = 0; i < numops; i++) + req->r_reply_op_result[i] = ceph_decode_32(&p); /* * if this connection filled our message, drop our reference now, to @@ -1199,7 +1257,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, if (!req->r_got_reply) { unsigned int bytes; - req->r_result = le32_to_cpu(rhead->result); + req->r_result = result; bytes = le32_to_cpu(msg->hdr.data_len); dout("handle_reply result %d bytes %d\n", req->r_result, bytes); @@ -1207,7 +1265,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, req->r_result = bytes; /* in case this is a write and we need to replay, */ - req->r_reassert_version = rhead->reassert_version; + req->r_reassert_version.epoch = cpu_to_le32(reassert_epoch); + req->r_reassert_version.version = cpu_to_le64(reassert_version); req->r_got_reply = 1; } else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) { @@ -1242,10 +1301,11 @@ done: ceph_osdc_put_request(req); return; +bad_put: + ceph_osdc_put_request(req); bad: - pr_err("corrupt osd_op_reply got %d %d expected %d\n", - (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len), - (int)sizeof(*rhead)); + pr_err("corrupt osd_op_reply got %d %d\n", + (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len)); ceph_msg_dump(msg); } @@ -1270,7 +1330,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc) * Requeue requests whose mapping to an OSD has changed. If requests map to * no osd, request a new map. * - * Caller should hold map_sem for read and request_mutex. + * Caller should hold map_sem for read. */ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) { @@ -1284,6 +1344,24 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) for (p = rb_first(&osdc->requests); p; ) { req = rb_entry(p, struct ceph_osd_request, r_node); p = rb_next(p); + + /* + * For linger requests that have not yet been + * registered, move them to the linger list; they'll + * be sent to the osd in the loop below. Unregister + * the request before re-registering it as a linger + * request to ensure the __map_request() below + * will decide it needs to be sent. + */ + if (req->r_linger && list_empty(&req->r_linger_item)) { + dout("%p tid %llu restart on osd%d\n", + req, req->r_tid, + req->r_osd ? req->r_osd->o_osd : -1); + __unregister_request(osdc, req); + __register_linger_request(osdc, req); + continue; + } + err = __map_request(osdc, req, force_resend); if (err < 0) continue; /* error */ @@ -1298,17 +1376,6 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) req->r_flags |= CEPH_OSD_FLAG_RETRY; } } - if (req->r_linger && list_empty(&req->r_linger_item)) { - /* - * register as a linger so that we will - * re-submit below and get a new tid - */ - dout("%p tid %llu restart on osd%d\n", - req, req->r_tid, - req->r_osd ? req->r_osd->o_osd : -1); - __register_linger_request(osdc, req); - __unregister_request(osdc, req); - } } list_for_each_entry_safe(req, nreq, &osdc->req_linger, @@ -1316,6 +1383,7 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("linger req=%p req->r_osd=%p\n", req, req->r_osd); err = __map_request(osdc, req, force_resend); + dout("__map_request returned %d\n", err); if (err == 0) continue; /* no change and no osd was specified */ if (err < 0) @@ -1337,6 +1405,7 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("%d requests for down osds, need new map\n", needmap); ceph_monc_request_next_osdmap(&osdc->client->monc); } + reset_changed_osds(osdc); } @@ -1393,7 +1462,6 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) osdc->osdmap = newmap; } kick_requests(osdc, 0); - reset_changed_osds(osdc); } else { dout("ignoring incremental map %u len %d\n", epoch, maplen); @@ -1454,7 +1522,9 @@ done: if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) ceph_monc_request_next_osdmap(&osdc->client->monc); - send_queued(osdc); + mutex_lock(&osdc->request_mutex); + __send_queued(osdc); + mutex_unlock(&osdc->request_mutex); up_read(&osdc->map_sem); wake_up_all(&osdc->client->auth_wq); return; @@ -1548,8 +1618,7 @@ static void __remove_event(struct ceph_osd_event *event) int ceph_osdc_create_event(struct ceph_osd_client *osdc, void (*event_cb)(u64, u64, u8, void *), - int one_shot, void *data, - struct ceph_osd_event **pevent) + void *data, struct ceph_osd_event **pevent) { struct ceph_osd_event *event; @@ -1559,14 +1628,13 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc, dout("create_event %p\n", event); event->cb = event_cb; - event->one_shot = one_shot; + event->one_shot = 0; event->data = data; event->osdc = osdc; INIT_LIST_HEAD(&event->osd_node); RB_CLEAR_NODE(&event->node); kref_init(&event->kref); /* one ref for us */ kref_get(&event->kref); /* one ref for the caller */ - init_completion(&event->completion); spin_lock(&osdc->event_lock); event->cookie = ++osdc->event_count; @@ -1602,7 +1670,6 @@ static void do_event_work(struct work_struct *work) dout("do_event_work completing %p\n", event); event->cb(ver, notify_id, opcode, event->data); - complete(&event->completion); dout("do_event_work completed %p\n", event); ceph_osdc_put_event(event); kfree(event_work); @@ -1612,7 +1679,8 @@ static void do_event_work(struct work_struct *work) /* * Process osd watch notifications */ -void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) +static void handle_watch_notify(struct ceph_osd_client *osdc, + struct ceph_msg *msg) { void *p, *end; u8 proto_ver; @@ -1633,9 +1701,8 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) spin_lock(&osdc->event_lock); event = __find_event(osdc, cookie); if (event) { + BUG_ON(event->one_shot); get_event(event); - if (event->one_shot) - __remove_event(event); } spin_unlock(&osdc->event_lock); dout("handle_watch_notify cookie %lld ver %lld event %p\n", @@ -1660,7 +1727,6 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) return; done_err: - complete(&event->completion); ceph_osdc_put_event(event); return; @@ -1669,21 +1735,6 @@ bad: return; } -int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout) -{ - int err; - - dout("wait_event %p\n", event); - err = wait_for_completion_interruptible_timeout(&event->completion, - timeout * HZ); - ceph_osdc_put_event(event); - if (err > 0) - err = 0; - dout("wait_event %p returns %d\n", event, err); - return err; -} -EXPORT_SYMBOL(ceph_osdc_wait_event); - /* * Register request, send initial attempt. */ @@ -1698,7 +1749,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, #ifdef CONFIG_BLOCK req->r_request->bio = req->r_bio; #endif - req->r_request->trail = req->r_trail; + req->r_request->trail = &req->r_trail; register_request(osdc, req); @@ -1857,7 +1908,6 @@ out_mempool: out: return err; } -EXPORT_SYMBOL(ceph_osdc_init); void ceph_osdc_stop(struct ceph_osd_client *osdc) { @@ -1874,7 +1924,6 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) ceph_msgpool_destroy(&osdc->msgpool_op); ceph_msgpool_destroy(&osdc->msgpool_op_reply); } -EXPORT_SYMBOL(ceph_osdc_stop); /* * Read some contiguous pages. If we cross a stripe boundary, shorten @@ -1894,7 +1943,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, req = ceph_osdc_new_request(osdc, layout, vino, off, plen, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, truncate_seq, truncate_size, NULL, - false, 1, page_align); + false, page_align); if (IS_ERR(req)) return PTR_ERR(req); @@ -1923,8 +1972,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - struct page **pages, int num_pages, - int flags, int do_sync, bool nofail) + struct page **pages, int num_pages) { struct ceph_osd_request *req; int rc = 0; @@ -1933,11 +1981,10 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, BUG_ON(vino.snap != CEPH_NOSNAP); req = ceph_osdc_new_request(osdc, layout, vino, off, &len, CEPH_OSD_OP_WRITE, - flags | CEPH_OSD_FLAG_ONDISK | - CEPH_OSD_FLAG_WRITE, - snapc, do_sync, + CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, + snapc, 0, truncate_seq, truncate_size, mtime, - nofail, 1, page_align); + true, page_align); if (IS_ERR(req)) return PTR_ERR(req); @@ -1946,7 +1993,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, dout("writepages %llu~%llu (%d pages)\n", off, len, req->r_num_pages); - rc = ceph_osdc_start_request(osdc, req, nofail); + rc = ceph_osdc_start_request(osdc, req, true); if (!rc) rc = ceph_osdc_wait_request(osdc, req); @@ -2039,7 +2086,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, if (data_len > 0) { int want = calc_pages_for(req->r_page_alignment, data_len); - if (unlikely(req->r_num_pages < want)) { + if (req->r_pages && unlikely(req->r_num_pages < want)) { pr_warning("tid %lld reply has %d bytes %d pages, we" " had only %d pages ready\n", tid, data_len, want, req->r_num_pages); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index de73214..4543b9a 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -13,26 +13,18 @@ char *ceph_osdmap_state_str(char *str, int len, int state) { - int flag = 0; - if (!len) - goto done; - - *str = '\0'; - if (state) { - if (state & CEPH_OSD_EXISTS) { - snprintf(str, len, "exists"); - flag = 1; - } - if (state & CEPH_OSD_UP) { - snprintf(str, len, "%s%s%s", str, (flag ? ", " : ""), - "up"); - flag = 1; - } - } else { + return str; + + if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP)) + snprintf(str, len, "exists, up"); + else if (state & CEPH_OSD_EXISTS) + snprintf(str, len, "exists"); + else if (state & CEPH_OSD_UP) + snprintf(str, len, "up"); + else snprintf(str, len, "doesn't exist"); - } -done: + return str; } @@ -53,13 +45,8 @@ static int calc_bits_of(unsigned int t) */ static void calc_pg_masks(struct ceph_pg_pool_info *pi) { - pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1; - pi->pgp_num_mask = - (1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1; - pi->lpg_num_mask = - (1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1; - pi->lpgp_num_mask = - (1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1; + pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1; + pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1; } /* @@ -170,6 +157,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end) c->choose_local_tries = 2; c->choose_local_fallback_tries = 5; c->choose_total_tries = 19; + c->chooseleaf_descend_once = 0; ceph_decode_need(p, end, 4*sizeof(u32), bad); magic = ceph_decode_32(p); @@ -336,6 +324,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end) dout("crush decode tunable choose_total_tries = %d", c->choose_total_tries); + ceph_decode_need(p, end, sizeof(u32), done); + c->chooseleaf_descend_once = ceph_decode_32(p); + dout("crush decode tunable chooseleaf_descend_once = %d", + c->chooseleaf_descend_once); + done: dout("crush_decode success\n"); return c; @@ -354,12 +347,13 @@ bad: */ static int pgid_cmp(struct ceph_pg l, struct ceph_pg r) { - u64 a = *(u64 *)&l; - u64 b = *(u64 *)&r; - - if (a < b) + if (l.pool < r.pool) + return -1; + if (l.pool > r.pool) + return 1; + if (l.seed < r.seed) return -1; - if (a > b) + if (l.seed > r.seed) return 1; return 0; } @@ -405,8 +399,8 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, } else if (c > 0) { n = n->rb_right; } else { - dout("__lookup_pg_mapping %llx got %p\n", - *(u64 *)&pgid, pg); + dout("__lookup_pg_mapping %lld.%x got %p\n", + pgid.pool, pgid.seed, pg); return pg; } } @@ -418,12 +412,13 @@ static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid) struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid); if (pg) { - dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg); + dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed, + pg); rb_erase(&pg->node, root); kfree(pg); return 0; } - dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid); + dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed); return -ENOENT; } @@ -452,7 +447,7 @@ static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new) return 0; } -static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) +static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id) { struct ceph_pg_pool_info *pi; struct rb_node *n = root->rb_node; @@ -508,24 +503,57 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) { - unsigned int n, m; + u8 ev, cv; + unsigned len, num; + void *pool_end; + + ceph_decode_need(p, end, 2 + 4, bad); + ev = ceph_decode_8(p); /* encoding version */ + cv = ceph_decode_8(p); /* compat version */ + if (ev < 5) { + pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv); + return -EINVAL; + } + if (cv > 7) { + pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv); + return -EINVAL; + } + len = ceph_decode_32(p); + ceph_decode_need(p, end, len, bad); + pool_end = *p + len; - ceph_decode_copy(p, &pi->v, sizeof(pi->v)); - calc_pg_masks(pi); + pi->type = ceph_decode_8(p); + pi->size = ceph_decode_8(p); + pi->crush_ruleset = ceph_decode_8(p); + pi->object_hash = ceph_decode_8(p); + + pi->pg_num = ceph_decode_32(p); + pi->pgp_num = ceph_decode_32(p); - /* num_snaps * snap_info_t */ - n = le32_to_cpu(pi->v.num_snaps); - while (n--) { - ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) + - sizeof(struct ceph_timespec), bad); - *p += sizeof(u64) + /* key */ - 1 + sizeof(u64) + /* u8, snapid */ - sizeof(struct ceph_timespec); - m = ceph_decode_32(p); /* snap name */ - *p += m; + *p += 4 + 4; /* skip lpg* */ + *p += 4; /* skip last_change */ + *p += 8 + 4; /* skip snap_seq, snap_epoch */ + + /* skip snaps */ + num = ceph_decode_32(p); + while (num--) { + *p += 8; /* snapid key */ + *p += 1 + 1; /* versions */ + len = ceph_decode_32(p); + *p += len; } - *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; + /* skip removed snaps */ + num = ceph_decode_32(p); + *p += num * (8 + 8); + + *p += 8; /* skip auid */ + pi->flags = ceph_decode_64(p); + + /* ignore the rest */ + + *p = pool_end; + calc_pg_masks(pi); return 0; bad: @@ -535,14 +563,15 @@ bad: static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) { struct ceph_pg_pool_info *pi; - u32 num, len, pool; + u32 num, len; + u64 pool; ceph_decode_32_safe(p, end, num, bad); dout(" %d pool names\n", num); while (num--) { - ceph_decode_32_safe(p, end, pool, bad); + ceph_decode_64_safe(p, end, pool, bad); ceph_decode_32_safe(p, end, len, bad); - dout(" pool %d len %d\n", pool, len); + dout(" pool %llu len %d\n", pool, len); ceph_decode_need(p, end, len, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); if (pi) { @@ -625,6 +654,24 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) return 0; } +static int __decode_pgid(void **p, void *end, struct ceph_pg *pg) +{ + u8 v; + + ceph_decode_need(p, end, 1+8+4+4, bad); + v = ceph_decode_8(p); + if (v != 1) + goto bad; + pg->pool = ceph_decode_64(p); + pg->seed = ceph_decode_32(p); + *p += 4; /* skip preferred */ + return 0; + +bad: + dout("error decoding pgid\n"); + return -EINVAL; +} + /* * decode a full map. */ @@ -633,7 +680,6 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) struct ceph_osdmap *map; u16 version; u32 len, max, i; - u8 ev; int err = -EINVAL; void *start = *p; struct ceph_pg_pool_info *pi; @@ -646,9 +692,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) map->pg_temp = RB_ROOT; ceph_decode_16_safe(p, end, version, bad); - if (version > CEPH_OSDMAP_VERSION) { - pr_warning("got unknown v %d > %d of osdmap\n", version, - CEPH_OSDMAP_VERSION); + if (version > 6) { + pr_warning("got unknown v %d > 6 of osdmap\n", version); + goto bad; + } + if (version < 6) { + pr_warning("got old v %d < 6 of osdmap\n", version); goto bad; } @@ -660,20 +709,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ceph_decode_32_safe(p, end, max, bad); while (max--) { - ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); + ceph_decode_need(p, end, 8 + 2, bad); err = -ENOMEM; pi = kzalloc(sizeof(*pi), GFP_NOFS); if (!pi) goto bad; - pi->id = ceph_decode_32(p); - err = -EINVAL; - ev = ceph_decode_8(p); /* encoding version */ - if (ev > CEPH_PG_POOL_VERSION) { - pr_warning("got unknown v %d > %d of ceph_pg_pool\n", - ev, CEPH_PG_POOL_VERSION); - kfree(pi); - goto bad; - } + pi->id = ceph_decode_64(p); err = __decode_pool(p, end, pi); if (err < 0) { kfree(pi); @@ -682,12 +723,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) __insert_pg_pool(&map->pg_pools, pi); } - if (version >= 5) { - err = __decode_pool_names(p, end, map); - if (err < 0) { - dout("fail to decode pool names"); - goto bad; - } + err = __decode_pool_names(p, end, map); + if (err < 0) { + dout("fail to decode pool names"); + goto bad; } ceph_decode_32_safe(p, end, map->pool_max, bad); @@ -726,8 +765,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) struct ceph_pg pgid; struct ceph_pg_mapping *pg; - ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); - ceph_decode_copy(p, &pgid, sizeof(pgid)); + err = __decode_pgid(p, end, &pgid); + if (err) + goto bad; + ceph_decode_need(p, end, sizeof(u32), bad); n = ceph_decode_32(p); err = -EINVAL; if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) @@ -745,7 +786,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) err = __insert_pg_mapping(pg, &map->pg_temp); if (err) goto bad; - dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len); + dout(" added pg_temp %lld.%x len %d\n", pgid.pool, pgid.seed, + len); } /* crush */ @@ -784,16 +826,17 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_fsid fsid; u32 epoch = 0; struct ceph_timespec modified; - u32 len, pool; - __s32 new_pool_max, new_flags, max; + s32 len; + u64 pool; + __s64 new_pool_max; + __s32 new_flags, max; void *start = *p; int err = -EINVAL; u16 version; ceph_decode_16_safe(p, end, version, bad); - if (version > CEPH_OSDMAP_INC_VERSION) { - pr_warning("got unknown v %d > %d of inc osdmap\n", version, - CEPH_OSDMAP_INC_VERSION); + if (version != 6) { + pr_warning("got unknown v %d != 6 of inc osdmap\n", version); goto bad; } @@ -803,7 +846,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, epoch = ceph_decode_32(p); BUG_ON(epoch != map->epoch+1); ceph_decode_copy(p, &modified, sizeof(modified)); - new_pool_max = ceph_decode_32(p); + new_pool_max = ceph_decode_64(p); new_flags = ceph_decode_32(p); /* full map? */ @@ -853,18 +896,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, /* new_pool */ ceph_decode_32_safe(p, end, len, bad); while (len--) { - __u8 ev; struct ceph_pg_pool_info *pi; - ceph_decode_32_safe(p, end, pool, bad); - ceph_decode_need(p, end, 1 + sizeof(pi->v), bad); - ev = ceph_decode_8(p); /* encoding version */ - if (ev > CEPH_PG_POOL_VERSION) { - pr_warning("got unknown v %d > %d of ceph_pg_pool\n", - ev, CEPH_PG_POOL_VERSION); - err = -EINVAL; - goto bad; - } + ceph_decode_64_safe(p, end, pool, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); if (!pi) { pi = kzalloc(sizeof(*pi), GFP_NOFS); @@ -890,7 +924,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, while (len--) { struct ceph_pg_pool_info *pi; - ceph_decode_32_safe(p, end, pool, bad); + ceph_decode_64_safe(p, end, pool, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); if (pi) __remove_pg_pool(&map->pg_pools, pi); @@ -948,10 +982,12 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, int j; struct ceph_pg pgid; u32 pglen; - ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); - ceph_decode_copy(p, &pgid, sizeof(pgid)); - pglen = ceph_decode_32(p); + err = __decode_pgid(p, end, &pgid); + if (err) + goto bad; + ceph_decode_need(p, end, sizeof(u32), bad); + pglen = ceph_decode_32(p); if (pglen) { ceph_decode_need(p, end, pglen*sizeof(u32), bad); @@ -975,8 +1011,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, kfree(pg); goto bad; } - dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, - pglen); + dout(" added pg_temp %lld.%x len %d\n", pgid.pool, + pgid.seed, pglen); } else { /* remove */ __remove_pg_mapping(&map->pg_temp, pgid); @@ -1010,7 +1046,7 @@ bad: * pass a stride back to the caller. */ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, - u64 off, u64 *plen, + u64 off, u64 len, u64 *ono, u64 *oxoff, u64 *oxlen) { @@ -1021,7 +1057,7 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u32 su_per_object; u64 t, su_offset; - dout("mapping %llu~%llu osize %u fl_su %u\n", off, *plen, + dout("mapping %llu~%llu osize %u fl_su %u\n", off, len, osize, su); if (su == 0 || sc == 0) goto invalid; @@ -1054,11 +1090,10 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, /* * Calculate the length of the extent being written to the selected - * object. This is the minimum of the full length requested (plen) or + * object. This is the minimum of the full length requested (len) or * the remainder of the current stripe being written to. */ - *oxlen = min_t(u64, *plen, su - su_offset); - *plen = *oxlen; + *oxlen = min_t(u64, len, su - su_offset); dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); return 0; @@ -1076,33 +1111,24 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping); * calculate an object layout (i.e. pgid) from an oid, * file_layout, and osdmap */ -int ceph_calc_object_layout(struct ceph_object_layout *ol, +int ceph_calc_object_layout(struct ceph_pg *pg, const char *oid, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap) { unsigned int num, num_mask; - struct ceph_pg pgid; - int poolid = le32_to_cpu(fl->fl_pg_pool); struct ceph_pg_pool_info *pool; - unsigned int ps; BUG_ON(!osdmap); - - pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); + pg->pool = le32_to_cpu(fl->fl_pg_pool); + pool = __lookup_pg_pool(&osdmap->pg_pools, pg->pool); if (!pool) return -EIO; - ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); - num = le32_to_cpu(pool->v.pg_num); + pg->seed = ceph_str_hash(pool->object_hash, oid, strlen(oid)); + num = pool->pg_num; num_mask = pool->pg_num_mask; - pgid.ps = cpu_to_le16(ps); - pgid.preferred = cpu_to_le16(-1); - pgid.pool = fl->fl_pg_pool; - dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps); - - ol->ol_pgid = pgid; - ol->ol_stripe_unit = fl->fl_object_stripe_unit; + dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pg->pool, pg->seed); return 0; } EXPORT_SYMBOL(ceph_calc_object_layout); @@ -1117,19 +1143,16 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, struct ceph_pg_mapping *pg; struct ceph_pg_pool_info *pool; int ruleno; - unsigned int poolid, ps, pps, t, r; + int r; + u32 pps; - poolid = le32_to_cpu(pgid.pool); - ps = le16_to_cpu(pgid.ps); - - pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); + pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); if (!pool) return NULL; /* pg_temp? */ - t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), - pool->pgp_num_mask); - pgid.ps = cpu_to_le16(t); + pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num, + pool->pgp_num_mask); pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { *num = pg->len; @@ -1137,26 +1160,39 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, } /* crush */ - ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, - pool->v.type, pool->v.size); + ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, + pool->type, pool->size); if (ruleno < 0) { - pr_err("no crush rule pool %d ruleset %d type %d size %d\n", - poolid, pool->v.crush_ruleset, pool->v.type, - pool->v.size); + pr_err("no crush rule pool %lld ruleset %d type %d size %d\n", + pgid.pool, pool->crush_ruleset, pool->type, + pool->size); return NULL; } - pps = ceph_stable_mod(ps, - le32_to_cpu(pool->v.pgp_num), - pool->pgp_num_mask); - pps += poolid; + if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) { + /* hash pool id and seed sothat pool PGs do not overlap */ + pps = crush_hash32_2(CRUSH_HASH_RJENKINS1, + ceph_stable_mod(pgid.seed, pool->pgp_num, + pool->pgp_num_mask), + pgid.pool); + } else { + /* + * legacy ehavior: add ps and pool together. this is + * not a great approach because the PGs from each pool + * will overlap on top of each other: 0.5 == 1.4 == + * 2.3 == ... + */ + pps = ceph_stable_mod(pgid.seed, pool->pgp_num, + pool->pgp_num_mask) + + (unsigned)pgid.pool; + } r = crush_do_rule(osdmap->crush, ruleno, pps, osds, - min_t(int, pool->v.size, *num), + min_t(int, pool->size, *num), osdmap->osd_weight); if (r < 0) { - pr_err("error %d from crush rule: pool %d ruleset %d type %d" - " size %d\n", r, poolid, pool->v.crush_ruleset, - pool->v.type, pool->v.size); + pr_err("error %d from crush rule: pool %lld ruleset %d type %d" + " size %d\n", r, pgid.pool, pool->crush_ruleset, + pool->type, pool->size); return NULL; } *num = r; diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index cd9c21d..815a224 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -12,7 +12,7 @@ /* * build a vector of user pages */ -struct page **ceph_get_direct_page_vector(const char __user *data, +struct page **ceph_get_direct_page_vector(const void __user *data, int num_pages, bool write_page) { struct page **pages; @@ -93,7 +93,7 @@ EXPORT_SYMBOL(ceph_alloc_page_vector); * copy user data into a page vector */ int ceph_copy_user_to_page_vector(struct page **pages, - const char __user *data, + const void __user *data, loff_t off, size_t len) { int i = 0; @@ -118,17 +118,17 @@ int ceph_copy_user_to_page_vector(struct page **pages, } EXPORT_SYMBOL(ceph_copy_user_to_page_vector); -int ceph_copy_to_page_vector(struct page **pages, - const char *data, +void ceph_copy_to_page_vector(struct page **pages, + const void *data, loff_t off, size_t len) { int i = 0; size_t po = off & ~PAGE_CACHE_MASK; size_t left = len; - size_t l; while (left > 0) { - l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + memcpy(page_address(pages[i]) + po, data, l); data += l; left -= l; @@ -138,21 +138,20 @@ int ceph_copy_to_page_vector(struct page **pages, i++; } } - return len; } EXPORT_SYMBOL(ceph_copy_to_page_vector); -int ceph_copy_from_page_vector(struct page **pages, - char *data, +void ceph_copy_from_page_vector(struct page **pages, + void *data, loff_t off, size_t len) { int i = 0; size_t po = off & ~PAGE_CACHE_MASK; size_t left = len; - size_t l; while (left > 0) { - l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + memcpy(data, page_address(pages[i]) + po, l); data += l; left -= l; @@ -162,7 +161,6 @@ int ceph_copy_from_page_vector(struct page **pages, i++; } } - return len; } EXPORT_SYMBOL(ceph_copy_from_page_vector); @@ -170,7 +168,7 @@ EXPORT_SYMBOL(ceph_copy_from_page_vector); * copy user data from a page vector into a user pointer */ int ceph_copy_page_vector_to_user(struct page **pages, - char __user *data, + void __user *data, loff_t off, size_t len) { int i = 0; diff --git a/net/core/Makefile b/net/core/Makefile index 674641b..b33b996 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -9,10 +9,11 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ - sock_diag.o + sock_diag.o dev_ioctl.o obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o +obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o diff --git a/net/core/datagram.c b/net/core/datagram.c index 0337e2b..368f9c3 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -187,7 +187,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, skb_queue_walk(queue, skb) { *peeked = skb->peeked; if (flags & MSG_PEEK) { - if (*off >= skb->len) { + if (*off >= skb->len && skb->len) { *off -= skb->len; continue; } diff --git a/net/core/dev.c b/net/core/dev.c index d0cbc93..b24ab0e9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -97,8 +97,6 @@ #include <net/net_namespace.h> #include <net/sock.h> #include <linux/rtnetlink.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> #include <linux/stat.h> #include <net/dst.h> #include <net/pkt_sched.h> @@ -106,12 +104,10 @@ #include <net/xfrm.h> #include <linux/highmem.h> #include <linux/init.h> -#include <linux/kmod.h> #include <linux/module.h> #include <linux/netpoll.h> #include <linux/rcupdate.h> #include <linux/delay.h> -#include <net/wext.h> #include <net/iw_handler.h> #include <asm/current.h> #include <linux/audit.h> @@ -132,9 +128,7 @@ #include <linux/pci.h> #include <linux/inetdevice.h> #include <linux/cpu_rmap.h> -#include <linux/net_tstamp.h> #include <linux/static_key.h> -#include <net/flow_keys.h> #include "net-sysfs.h" @@ -144,41 +138,10 @@ /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) -/* - * The list of packet types we will receive (as opposed to discard) - * and the routines to invoke. - * - * Why 16. Because with 16 the only overlap we get on a hash of the - * low nibble of the protocol value is RARP/SNAP/X.25. - * - * NOTE: That is no longer true with the addition of VLAN tags. Not - * sure which should go first, but I bet it won't make much - * difference if we are running VLANs. The good news is that - * this protocol won't be in the list unless compiled in, so - * the average user (w/out VLANs) will not be adversely affected. - * --BLG - * - * 0800 IP - * 8100 802.1Q VLAN - * 0001 802.3 - * 0002 AX.25 - * 0004 802.2 - * 8035 RARP - * 0005 SNAP - * 0805 X.25 - * 0806 ARP - * 8137 IPX - * 0009 Localtalk - * 86DD IPv6 - */ - -#define PTYPE_HASH_SIZE (16) -#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) - static DEFINE_SPINLOCK(ptype_lock); static DEFINE_SPINLOCK(offload_lock); -static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; -static struct list_head ptype_all __read_mostly; /* Taps */ +struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; +struct list_head ptype_all __read_mostly; /* Taps */ static struct list_head offload_base __read_mostly; /* @@ -203,7 +166,7 @@ static struct list_head offload_base __read_mostly; DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); -DEFINE_SEQLOCK(devnet_rename_seq); +seqcount_t devnet_rename_seq; static inline void dev_base_seq_inc(struct net *net) { @@ -695,11 +658,10 @@ __setup("netdev=", netdev_boot_setup); struct net_device *__dev_get_by_name(struct net *net, const char *name) { - struct hlist_node *p; struct net_device *dev; struct hlist_head *head = dev_name_hash(net, name); - hlist_for_each_entry(dev, p, head, name_hlist) + hlist_for_each_entry(dev, head, name_hlist) if (!strncmp(dev->name, name, IFNAMSIZ)) return dev; @@ -721,11 +683,10 @@ EXPORT_SYMBOL(__dev_get_by_name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) { - struct hlist_node *p; struct net_device *dev; struct hlist_head *head = dev_name_hash(net, name); - hlist_for_each_entry_rcu(dev, p, head, name_hlist) + hlist_for_each_entry_rcu(dev, head, name_hlist) if (!strncmp(dev->name, name, IFNAMSIZ)) return dev; @@ -772,11 +733,10 @@ EXPORT_SYMBOL(dev_get_by_name); struct net_device *__dev_get_by_index(struct net *net, int ifindex) { - struct hlist_node *p; struct net_device *dev; struct hlist_head *head = dev_index_hash(net, ifindex); - hlist_for_each_entry(dev, p, head, index_hlist) + hlist_for_each_entry(dev, head, index_hlist) if (dev->ifindex == ifindex) return dev; @@ -797,11 +757,10 @@ EXPORT_SYMBOL(__dev_get_by_index); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) { - struct hlist_node *p; struct net_device *dev; struct hlist_head *head = dev_index_hash(net, ifindex); - hlist_for_each_entry_rcu(dev, p, head, index_hlist) + hlist_for_each_entry_rcu(dev, head, index_hlist) if (dev->ifindex == ifindex) return dev; @@ -1093,10 +1052,10 @@ int dev_change_name(struct net_device *dev, const char *newname) if (dev->flags & IFF_UP) return -EBUSY; - write_seqlock(&devnet_rename_seq); + write_seqcount_begin(&devnet_rename_seq); if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { - write_sequnlock(&devnet_rename_seq); + write_seqcount_end(&devnet_rename_seq); return 0; } @@ -1104,7 +1063,7 @@ int dev_change_name(struct net_device *dev, const char *newname) err = dev_get_valid_name(net, dev, newname); if (err < 0) { - write_sequnlock(&devnet_rename_seq); + write_seqcount_end(&devnet_rename_seq); return err; } @@ -1112,11 +1071,11 @@ rollback: ret = device_rename(&dev->dev, dev->name); if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); - write_sequnlock(&devnet_rename_seq); + write_seqcount_end(&devnet_rename_seq); return ret; } - write_sequnlock(&devnet_rename_seq); + write_seqcount_end(&devnet_rename_seq); write_lock_bh(&dev_base_lock); hlist_del_rcu(&dev->name_hlist); @@ -1135,7 +1094,7 @@ rollback: /* err >= 0 after dev_alloc_name() or stores the first errno */ if (err >= 0) { err = ret; - write_seqlock(&devnet_rename_seq); + write_seqcount_begin(&devnet_rename_seq); memcpy(dev->name, oldname, IFNAMSIZ); goto rollback; } else { @@ -1227,36 +1186,6 @@ void netdev_notify_peers(struct net_device *dev) } EXPORT_SYMBOL(netdev_notify_peers); -/** - * dev_load - load a network module - * @net: the applicable net namespace - * @name: name of interface - * - * If a network interface is not present and the process has suitable - * privileges this function loads the module. If module loading is not - * available in this kernel then it becomes a nop. - */ - -void dev_load(struct net *net, const char *name) -{ - struct net_device *dev; - int no_module; - - rcu_read_lock(); - dev = dev_get_by_name_rcu(net, name); - rcu_read_unlock(); - - no_module = !dev; - if (no_module && capable(CAP_NET_ADMIN)) - no_module = request_module("netdev-%s", name); - if (no_module && capable(CAP_SYS_MODULE)) { - if (!request_module("%s", name)) - pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", - name); - } -} -EXPORT_SYMBOL(dev_load); - static int __dev_open(struct net_device *dev) { const struct net_device_ops *ops = dev->netdev_ops; @@ -1267,6 +1196,14 @@ static int __dev_open(struct net_device *dev) if (!netif_device_present(dev)) return -ENODEV; + /* Block netpoll from trying to do any rx path servicing. + * If we don't do this there is a chance ndo_poll_controller + * or ndo_poll may be running while we open the device + */ + ret = netpoll_rx_disable(dev); + if (ret) + return ret; + ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); ret = notifier_to_errno(ret); if (ret) @@ -1280,6 +1217,8 @@ static int __dev_open(struct net_device *dev) if (!ret && ops->ndo_open) ret = ops->ndo_open(dev); + netpoll_rx_enable(dev); + if (ret) clear_bit(__LINK_STATE_START, &dev->state); else { @@ -1371,9 +1310,16 @@ static int __dev_close(struct net_device *dev) int retval; LIST_HEAD(single); + /* Temporarily disable netpoll until the interface is down */ + retval = netpoll_rx_disable(dev); + if (retval) + return retval; + list_add(&dev->unreg_list, &single); retval = __dev_close_many(&single); list_del(&single); + + netpoll_rx_enable(dev); return retval; } @@ -1409,14 +1355,22 @@ static int dev_close_many(struct list_head *head) */ int dev_close(struct net_device *dev) { + int ret = 0; if (dev->flags & IFF_UP) { LIST_HEAD(single); + /* Block netpoll rx while the interface is going down */ + ret = netpoll_rx_disable(dev); + if (ret) + return ret; + list_add(&dev->unreg_list, &single); dev_close_many(&single); list_del(&single); + + netpoll_rx_enable(dev); } - return 0; + return ret; } EXPORT_SYMBOL(dev_close); @@ -1591,7 +1545,6 @@ void net_enable_timestamp(void) return; } #endif - WARN_ON(in_interrupt()); static_key_slow_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); @@ -1621,57 +1574,6 @@ static inline void net_timestamp_set(struct sk_buff *skb) __net_timestamp(SKB); \ } \ -static int net_hwtstamp_validate(struct ifreq *ifr) -{ - struct hwtstamp_config cfg; - enum hwtstamp_tx_types tx_type; - enum hwtstamp_rx_filters rx_filter; - int tx_type_valid = 0; - int rx_filter_valid = 0; - - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; - - if (cfg.flags) /* reserved for future extensions */ - return -EINVAL; - - tx_type = cfg.tx_type; - rx_filter = cfg.rx_filter; - - switch (tx_type) { - case HWTSTAMP_TX_OFF: - case HWTSTAMP_TX_ON: - case HWTSTAMP_TX_ONESTEP_SYNC: - tx_type_valid = 1; - break; - } - - switch (rx_filter) { - case HWTSTAMP_FILTER_NONE: - case HWTSTAMP_FILTER_ALL: - case HWTSTAMP_FILTER_SOME: - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - rx_filter_valid = 1; - break; - } - - if (!tx_type_valid || !rx_filter_valid) - return -ERANGE; - - return 0; -} - static inline bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) { @@ -1722,7 +1624,6 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } skb_orphan(skb); - nf_reset(skb); if (unlikely(!is_skb_forwardable(dev, skb))) { atomic_long_inc(&dev->rx_dropped); @@ -1738,6 +1639,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb->mark = 0; secpath_reset(skb); nf_reset(skb); + nf_reset_trace(skb); return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -1857,6 +1759,230 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) } } +#ifdef CONFIG_XPS +static DEFINE_MUTEX(xps_map_mutex); +#define xmap_dereference(P) \ + rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) + +static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, + int cpu, u16 index) +{ + struct xps_map *map = NULL; + int pos; + + if (dev_maps) + map = xmap_dereference(dev_maps->cpu_map[cpu]); + + for (pos = 0; map && pos < map->len; pos++) { + if (map->queues[pos] == index) { + if (map->len > 1) { + map->queues[pos] = map->queues[--map->len]; + } else { + RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL); + kfree_rcu(map, rcu); + map = NULL; + } + break; + } + } + + return map; +} + +static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) +{ + struct xps_dev_maps *dev_maps; + int cpu, i; + bool active = false; + + mutex_lock(&xps_map_mutex); + dev_maps = xmap_dereference(dev->xps_maps); + + if (!dev_maps) + goto out_no_maps; + + for_each_possible_cpu(cpu) { + for (i = index; i < dev->num_tx_queues; i++) { + if (!remove_xps_queue(dev_maps, cpu, i)) + break; + } + if (i == dev->num_tx_queues) + active = true; + } + + if (!active) { + RCU_INIT_POINTER(dev->xps_maps, NULL); + kfree_rcu(dev_maps, rcu); + } + + for (i = index; i < dev->num_tx_queues; i++) + netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), + NUMA_NO_NODE); + +out_no_maps: + mutex_unlock(&xps_map_mutex); +} + +static struct xps_map *expand_xps_map(struct xps_map *map, + int cpu, u16 index) +{ + struct xps_map *new_map; + int alloc_len = XPS_MIN_MAP_ALLOC; + int i, pos; + + for (pos = 0; map && pos < map->len; pos++) { + if (map->queues[pos] != index) + continue; + return map; + } + + /* Need to add queue to this CPU's existing map */ + if (map) { + if (pos < map->alloc_len) + return map; + + alloc_len = map->alloc_len * 2; + } + + /* Need to allocate new map to store queue on this CPU's map */ + new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL, + cpu_to_node(cpu)); + if (!new_map) + return NULL; + + for (i = 0; i < pos; i++) + new_map->queues[i] = map->queues[i]; + new_map->alloc_len = alloc_len; + new_map->len = pos; + + return new_map; +} + +int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index) +{ + struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; + struct xps_map *map, *new_map; + int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES); + int cpu, numa_node_id = -2; + bool active = false; + + mutex_lock(&xps_map_mutex); + + dev_maps = xmap_dereference(dev->xps_maps); + + /* allocate memory for queue storage */ + for_each_online_cpu(cpu) { + if (!cpumask_test_cpu(cpu, mask)) + continue; + + if (!new_dev_maps) + new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); + if (!new_dev_maps) { + mutex_unlock(&xps_map_mutex); + return -ENOMEM; + } + + map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : + NULL; + + map = expand_xps_map(map, cpu, index); + if (!map) + goto error; + + RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); + } + + if (!new_dev_maps) + goto out_no_new_maps; + + for_each_possible_cpu(cpu) { + if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { + /* add queue to CPU maps */ + int pos = 0; + + map = xmap_dereference(new_dev_maps->cpu_map[cpu]); + while ((pos < map->len) && (map->queues[pos] != index)) + pos++; + + if (pos == map->len) + map->queues[map->len++] = index; +#ifdef CONFIG_NUMA + if (numa_node_id == -2) + numa_node_id = cpu_to_node(cpu); + else if (numa_node_id != cpu_to_node(cpu)) + numa_node_id = -1; +#endif + } else if (dev_maps) { + /* fill in the new device map from the old device map */ + map = xmap_dereference(dev_maps->cpu_map[cpu]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); + } + + } + + rcu_assign_pointer(dev->xps_maps, new_dev_maps); + + /* Cleanup old maps */ + if (dev_maps) { + for_each_possible_cpu(cpu) { + new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); + map = xmap_dereference(dev_maps->cpu_map[cpu]); + if (map && map != new_map) + kfree_rcu(map, rcu); + } + + kfree_rcu(dev_maps, rcu); + } + + dev_maps = new_dev_maps; + active = true; + +out_no_new_maps: + /* update Tx queue numa node */ + netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), + (numa_node_id >= 0) ? numa_node_id : + NUMA_NO_NODE); + + if (!dev_maps) + goto out_no_maps; + + /* removes queue from unused CPUs */ + for_each_possible_cpu(cpu) { + if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) + continue; + + if (remove_xps_queue(dev_maps, cpu, index)) + active = true; + } + + /* free map if not active */ + if (!active) { + RCU_INIT_POINTER(dev->xps_maps, NULL); + kfree_rcu(dev_maps, rcu); + } + +out_no_maps: + mutex_unlock(&xps_map_mutex); + + return 0; +error: + /* remove any maps that we added */ + for_each_possible_cpu(cpu) { + new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); + map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : + NULL; + if (new_map && new_map != map) + kfree(new_map); + } + + mutex_unlock(&xps_map_mutex); + + kfree(new_dev_maps); + return -ENOMEM; +} +EXPORT_SYMBOL(netif_set_xps_queue); + +#endif /* * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. @@ -1880,8 +2006,12 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->num_tc) netif_setup_tc(dev, txq); - if (txq < dev->real_num_tx_queues) + if (txq < dev->real_num_tx_queues) { qdisc_reset_all_tx_gt(dev, txq); +#ifdef CONFIG_XPS + netif_reset_xps_queues_gt(dev, txq); +#endif + } } dev->real_num_tx_queues = txq; @@ -2018,6 +2148,9 @@ static void skb_warn_bad_offload(const struct sk_buff *skb) struct net_device *dev = skb->dev; const char *driver = ""; + if (!net_ratelimit()) + return; + if (dev && dev->dev.parent) driver = dev_driver_string(dev->dev.parent); @@ -2046,6 +2179,15 @@ int skb_checksum_help(struct sk_buff *skb) return -EINVAL; } + /* Before computing a checksum, we should make sure no frag could + * be modified by an external entity : checksum could be wrong. + */ + if (skb_has_shared_frag(skb)) { + ret = __skb_linearize(skb); + if (ret) + goto out; + } + offset = skb_checksum_start_offset(skb); BUG_ON(offset >= skb_headlen(skb)); csum = skb_checksum(skb, offset, skb->len - offset, 0); @@ -2069,23 +2211,17 @@ out: EXPORT_SYMBOL(skb_checksum_help); /** - * skb_gso_segment - Perform segmentation on skb. + * skb_mac_gso_segment - mac layer segmentation handler. * @skb: buffer to segment * @features: features for the output path (see dev->features) - * - * This function segments the given skb and returns a list of segments. - * - * It may return NULL if the skb requires no segmentation. This is - * only possible when GSO is used for verifying header integrity. */ -struct sk_buff *skb_gso_segment(struct sk_buff *skb, - netdev_features_t features) +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; __be16 type = skb->protocol; int vlan_depth = ETH_HLEN; - int err; while (type == htons(ETH_P_8021Q)) { struct vlan_hdr *vh; @@ -2098,22 +2234,14 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, vlan_depth += VLAN_HLEN; } - skb_reset_mac_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); - if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { - skb_warn_bad_offload(skb); - - if (skb_header_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) - return ERR_PTR(err); - } - rcu_read_lock(); list_for_each_entry_rcu(ptype, &offload_base, list) { if (ptype->type == type && ptype->callbacks.gso_segment) { if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + int err; + err = ptype->callbacks.gso_send_check(skb); segs = ERR_PTR(err); if (err || skb_gso_ok(skb, features)) @@ -2131,7 +2259,50 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, return segs; } -EXPORT_SYMBOL(skb_gso_segment); +EXPORT_SYMBOL(skb_mac_gso_segment); + + +/* openvswitch calls this on rx path, so we need a different check. + */ +static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) +{ + if (tx_path) + return skb->ip_summed != CHECKSUM_PARTIAL; + else + return skb->ip_summed == CHECKSUM_NONE; +} + +/** + * __skb_gso_segment - Perform segmentation on skb. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * @tx_path: whether it is called in TX path + * + * This function segments the given skb and returns a list of segments. + * + * It may return NULL if the skb requires no segmentation. This is + * only possible when GSO is used for verifying header integrity. + */ +struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path) +{ + if (unlikely(skb_needs_check(skb, tx_path))) { + int err; + + skb_warn_bad_offload(skb); + + if (skb_header_cloned(skb) && + (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + return ERR_PTR(err); + } + + SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + return skb_mac_gso_segment(skb, features); +} +EXPORT_SYMBOL(__skb_gso_segment); /* Take action when hardware reception checksum errors are detected. */ #ifdef CONFIG_BUG @@ -2410,126 +2581,28 @@ out: return rc; } -static u32 hashrnd __read_mostly; - -/* - * Returns a Tx hash based on the given packet descriptor a Tx queues' number - * to be used as a distribution range. - */ -u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, - unsigned int num_tx_queues) -{ - u32 hash; - u16 qoffset = 0; - u16 qcount = num_tx_queues; - - if (skb_rx_queue_recorded(skb)) { - hash = skb_get_rx_queue(skb); - while (unlikely(hash >= num_tx_queues)) - hash -= num_tx_queues; - return hash; - } - - if (dev->num_tc) { - u8 tc = netdev_get_prio_tc_map(dev, skb->priority); - qoffset = dev->tc_to_txq[tc].offset; - qcount = dev->tc_to_txq[tc].count; - } - - if (skb->sk && skb->sk->sk_hash) - hash = skb->sk->sk_hash; - else - hash = (__force u16) skb->protocol; - hash = jhash_1word(hash, hashrnd); - - return (u16) (((u64) hash * qcount) >> 32) + qoffset; -} -EXPORT_SYMBOL(__skb_tx_hash); - -static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) -{ - if (unlikely(queue_index >= dev->real_num_tx_queues)) { - net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", - dev->name, queue_index, - dev->real_num_tx_queues); - return 0; - } - return queue_index; -} - -static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) +static void qdisc_pkt_len_init(struct sk_buff *skb) { -#ifdef CONFIG_XPS - struct xps_dev_maps *dev_maps; - struct xps_map *map; - int queue_index = -1; + const struct skb_shared_info *shinfo = skb_shinfo(skb); - rcu_read_lock(); - dev_maps = rcu_dereference(dev->xps_maps); - if (dev_maps) { - map = rcu_dereference( - dev_maps->cpu_map[raw_smp_processor_id()]); - if (map) { - if (map->len == 1) - queue_index = map->queues[0]; - else { - u32 hash; - if (skb->sk && skb->sk->sk_hash) - hash = skb->sk->sk_hash; - else - hash = (__force u16) skb->protocol ^ - skb->rxhash; - hash = jhash_1word(hash, hashrnd); - queue_index = map->queues[ - ((u64)hash * map->len) >> 32]; - } - if (unlikely(queue_index >= dev->real_num_tx_queues)) - queue_index = -1; - } - } - rcu_read_unlock(); - - return queue_index; -#else - return -1; -#endif -} - -struct netdev_queue *netdev_pick_tx(struct net_device *dev, - struct sk_buff *skb) -{ - int queue_index; - const struct net_device_ops *ops = dev->netdev_ops; - - if (dev->real_num_tx_queues == 1) - queue_index = 0; - else if (ops->ndo_select_queue) { - queue_index = ops->ndo_select_queue(dev, skb); - queue_index = dev_cap_txqueue(dev, queue_index); - } else { - struct sock *sk = skb->sk; - queue_index = sk_tx_queue_get(sk); - - if (queue_index < 0 || skb->ooo_okay || - queue_index >= dev->real_num_tx_queues) { - int old_index = queue_index; + qdisc_skb_cb(skb)->pkt_len = skb->len; - queue_index = get_xps_queue(dev, skb); - if (queue_index < 0) - queue_index = skb_tx_hash(dev, skb); + /* To get more precise estimation of bytes sent on wire, + * we add to pkt_len the headers size of all segments + */ + if (shinfo->gso_size) { + unsigned int hdr_len; - if (queue_index != old_index && sk) { - struct dst_entry *dst = - rcu_dereference_check(sk->sk_dst_cache, 1); + /* mac layer + network layer */ + hdr_len = skb_transport_header(skb) - skb_mac_header(skb); - if (dst && skb_dst(skb) == dst) - sk_tx_queue_set(sk, queue_index); - } - } + /* + transport layer */ + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + hdr_len += tcp_hdrlen(skb); + else + hdr_len += sizeof(struct udphdr); + qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; } - - skb_set_queue_mapping(skb, queue_index); - return netdev_get_tx_queue(dev, queue_index); } static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, @@ -2540,7 +2613,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, bool contended; int rc; - qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_pkt_len_init(skb); qdisc_calculate_pkt_len(skb, q); /* * Heuristic to force contended enqueues to serialize on a @@ -2663,6 +2736,8 @@ int dev_queue_xmit(struct sk_buff *skb) struct Qdisc *q; int rc = -ENOMEM; + skb_reset_mac_header(skb); + /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ @@ -2757,41 +2832,6 @@ static inline void ____napi_schedule(struct softnet_data *sd, __raise_softirq_irqoff(NET_RX_SOFTIRQ); } -/* - * __skb_get_rxhash: calculate a flow hash based on src/dst addresses - * and src/dst port numbers. Sets rxhash in skb to non-zero hash value - * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb - * if hash is a canonical 4-tuple hash over transport ports. - */ -void __skb_get_rxhash(struct sk_buff *skb) -{ - struct flow_keys keys; - u32 hash; - - if (!skb_flow_dissect(skb, &keys)) - return; - - if (keys.ports) - skb->l4_rxhash = 1; - - /* get a consistent hash (same value on both flow directions) */ - if (((__force u32)keys.dst < (__force u32)keys.src) || - (((__force u32)keys.dst == (__force u32)keys.src) && - ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { - swap(keys.dst, keys.src); - swap(keys.port16[0], keys.port16[1]); - } - - hash = jhash_3words((__force u32)keys.dst, - (__force u32)keys.src, - (__force u32)keys.ports, hashrnd); - if (!hash) - hash = 1; - - skb->rxhash = hash; -} -EXPORT_SYMBOL(__skb_get_rxhash); - #ifdef CONFIG_RPS /* One global table that all flow-based protocols share. */ @@ -3277,6 +3317,7 @@ int netdev_rx_handler_register(struct net_device *dev, if (dev->rx_handler) return -EBUSY; + /* Note: rx_handler_data must be set before rx_handler */ rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); rcu_assign_pointer(dev->rx_handler, rx_handler); @@ -3297,6 +3338,11 @@ void netdev_rx_handler_unregister(struct net_device *dev) ASSERT_RTNL(); RCU_INIT_POINTER(dev->rx_handler, NULL); + /* a reader seeing a non NULL rx_handler in a rcu_read_lock() + * section has a guarantee to see a non NULL rx_handler_data + * as well. + */ + synchronize_net(); RCU_INIT_POINTER(dev->rx_handler_data, NULL); } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); @@ -3318,7 +3364,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb) } } -static int __netif_receive_skb(struct sk_buff *skb) +static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) { struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; @@ -3327,24 +3373,11 @@ static int __netif_receive_skb(struct sk_buff *skb) bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; - unsigned long pflags = current->flags; net_timestamp_check(!netdev_tstamp_prequeue, skb); trace_netif_receive_skb(skb); - /* - * PFMEMALLOC skbs are special, they should - * - be delivered to SOCK_MEMALLOC sockets only - * - stay away from userspace - * - have bounded memory usage - * - * Use PF_MEMALLOC as this saves us from propagating the allocation - * context down to all allocation sites. - */ - if (sk_memalloc_socks() && skb_pfmemalloc(skb)) - current->flags |= PF_MEMALLOC; - /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) goto out; @@ -3352,7 +3385,8 @@ static int __netif_receive_skb(struct sk_buff *skb) orig_dev = skb->dev; skb_reset_network_header(skb); - skb_reset_transport_header(skb); + if (!skb_transport_header_was_set(skb)) + skb_reset_transport_header(skb); skb_reset_mac_len(skb); pt_prev = NULL; @@ -3377,7 +3411,7 @@ another_round: } #endif - if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + if (pfmemalloc) goto skip_taps; list_for_each_entry_rcu(ptype, &ptype_all, list) { @@ -3396,8 +3430,7 @@ skip_taps: ncls: #endif - if (sk_memalloc_socks() && skb_pfmemalloc(skb) - && !skb_pfmemalloc_protocol(skb)) + if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; if (vlan_tx_tag_present(skb)) { @@ -3419,6 +3452,7 @@ ncls: } switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: + ret = NET_RX_SUCCESS; goto unlock; case RX_HANDLER_ANOTHER: goto another_round; @@ -3467,7 +3501,31 @@ drop: unlock: rcu_read_unlock(); out: - tsk_restore_flags(current, pflags, PF_MEMALLOC); + return ret; +} + +static int __netif_receive_skb(struct sk_buff *skb) +{ + int ret; + + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) { + unsigned long pflags = current->flags; + + /* + * PFMEMALLOC skbs are special, they should + * - be delivered to SOCK_MEMALLOC sockets only + * - stay away from userspace + * - have bounded memory usage + * + * Use PF_MEMALLOC as this saves us from propagating the allocation + * context down to all allocation sites. + */ + current->flags |= PF_MEMALLOC; + ret = __netif_receive_skb_core(skb, true); + tsk_restore_flags(current, pflags, PF_MEMALLOC); + } else + ret = __netif_receive_skb_core(skb, false); + return ret; } @@ -3634,7 +3692,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff __be16 type = skb->protocol; struct list_head *head = &offload_base; int same_flow; - int mac_len; enum gro_result ret; if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) @@ -3651,8 +3708,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff continue; skb_set_network_header(skb, skb_gro_offset(skb)); - mac_len = skb->network_header - skb->mac_header; - skb->mac_len = mac_len; + skb_reset_mac_len(skb); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; @@ -4056,7 +4112,7 @@ static void net_rx_action(struct softirq_action *h) * Allow this to run for 2 jiffies since which will allow * an average latency of 1.5/HZ. */ - if (unlikely(budget <= 0 || time_after(jiffies, time_limit))) + if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) goto softnet_break; local_irq_enable(); @@ -4134,530 +4190,231 @@ softnet_break: goto out; } -static gifconf_func_t *gifconf_list[NPROTO]; - -/** - * register_gifconf - register a SIOCGIF handler - * @family: Address family - * @gifconf: Function handler - * - * Register protocol dependent address dumping routines. The handler - * that is passed must not be freed or reused until it has been replaced - * by another handler. - */ -int register_gifconf(unsigned int family, gifconf_func_t *gifconf) -{ - if (family >= NPROTO) - return -EINVAL; - gifconf_list[family] = gifconf; - return 0; -} -EXPORT_SYMBOL(register_gifconf); - - -/* - * Map an interface index to its name (SIOCGIFNAME) - */ - -/* - * We need this ioctl for efficient implementation of the - * if_indextoname() function required by the IPv6 API. Without - * it, we would have to search all the interfaces to find a - * match. --pb - */ - -static int dev_ifname(struct net *net, struct ifreq __user *arg) -{ +struct netdev_upper { struct net_device *dev; - struct ifreq ifr; - unsigned seq; - - /* - * Fetch the caller's info block. - */ + bool master; + struct list_head list; + struct rcu_head rcu; + struct list_head search_list; +}; - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; +static void __append_search_uppers(struct list_head *search_list, + struct net_device *dev) +{ + struct netdev_upper *upper; -retry: - seq = read_seqbegin(&devnet_rename_seq); - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); - if (!dev) { - rcu_read_unlock(); - return -ENODEV; + list_for_each_entry(upper, &dev->upper_dev_list, list) { + /* check if this upper is not already in search list */ + if (list_empty(&upper->search_list)) + list_add_tail(&upper->search_list, search_list); } - - strcpy(ifr.ifr_name, dev->name); - rcu_read_unlock(); - if (read_seqretry(&devnet_rename_seq, seq)) - goto retry; - - if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; - return 0; } -/* - * Perform a SIOCGIFCONF call. This structure will change - * size eventually, and there is nothing I can do about it. - * Thus we will need a 'compatibility mode'. - */ - -static int dev_ifconf(struct net *net, char __user *arg) +static bool __netdev_search_upper_dev(struct net_device *dev, + struct net_device *upper_dev) { - struct ifconf ifc; - struct net_device *dev; - char __user *pos; - int len; - int total; - int i; - - /* - * Fetch the caller's info block. - */ + LIST_HEAD(search_list); + struct netdev_upper *upper; + struct netdev_upper *tmp; + bool ret = false; - if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) - return -EFAULT; - - pos = ifc.ifc_buf; - len = ifc.ifc_len; - - /* - * Loop over the interfaces, and write an info block for each. - */ - - total = 0; - for_each_netdev(net, dev) { - for (i = 0; i < NPROTO; i++) { - if (gifconf_list[i]) { - int done; - if (!pos) - done = gifconf_list[i](dev, NULL, 0); - else - done = gifconf_list[i](dev, pos + total, - len - total); - if (done < 0) - return -EFAULT; - total += done; - } + __append_search_uppers(&search_list, dev); + list_for_each_entry(upper, &search_list, search_list) { + if (upper->dev == upper_dev) { + ret = true; + break; } + __append_search_uppers(&search_list, upper->dev); } - - /* - * All done. Write the updated control block back to the caller. - */ - ifc.ifc_len = total; - - /* - * Both BSD and Solaris return 0 here, so we do too. - */ - return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; + list_for_each_entry_safe(upper, tmp, &search_list, search_list) + INIT_LIST_HEAD(&upper->search_list); + return ret; } -#ifdef CONFIG_PROC_FS - -#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) - -#define get_bucket(x) ((x) >> BUCKET_SPACE) -#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) -#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) - -static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) +static struct netdev_upper *__netdev_find_upper(struct net_device *dev, + struct net_device *upper_dev) { - struct net *net = seq_file_net(seq); - struct net_device *dev; - struct hlist_node *p; - struct hlist_head *h; - unsigned int count = 0, offset = get_offset(*pos); + struct netdev_upper *upper; - h = &net->dev_name_head[get_bucket(*pos)]; - hlist_for_each_entry_rcu(dev, p, h, name_hlist) { - if (++count == offset) - return dev; + list_for_each_entry(upper, &dev->upper_dev_list, list) { + if (upper->dev == upper_dev) + return upper; } - return NULL; } -static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) -{ - struct net_device *dev; - unsigned int bucket; - - do { - dev = dev_from_same_bucket(seq, pos); - if (dev) - return dev; - - bucket = get_bucket(*pos) + 1; - *pos = set_bucket_offset(bucket, 1); - } while (bucket < NETDEV_HASHENTRIES); - - return NULL; -} - -/* - * This is invoked by the /proc filesystem handler to display a device - * in detail. +/** + * netdev_has_upper_dev - Check if device is linked to an upper device + * @dev: device + * @upper_dev: upper device to check + * + * Find out if a device is linked to specified upper device and return true + * in case it is. Note that this checks only immediate upper device, + * not through a complete stack of devices. The caller must hold the RTNL lock. */ -void *dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - rcu_read_lock(); - if (!*pos) - return SEQ_START_TOKEN; - - if (get_bucket(*pos) >= NETDEV_HASHENTRIES) - return NULL; - - return dev_from_bucket(seq, pos); -} - -void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return dev_from_bucket(seq, pos); -} - -void dev_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) +bool netdev_has_upper_dev(struct net_device *dev, + struct net_device *upper_dev) { - rcu_read_unlock(); -} - -static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) -{ - struct rtnl_link_stats64 temp; - const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); + ASSERT_RTNL(); - seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " - "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", - dev->name, stats->rx_bytes, stats->rx_packets, - stats->rx_errors, - stats->rx_dropped + stats->rx_missed_errors, - stats->rx_fifo_errors, - stats->rx_length_errors + stats->rx_over_errors + - stats->rx_crc_errors + stats->rx_frame_errors, - stats->rx_compressed, stats->multicast, - stats->tx_bytes, stats->tx_packets, - stats->tx_errors, stats->tx_dropped, - stats->tx_fifo_errors, stats->collisions, - stats->tx_carrier_errors + - stats->tx_aborted_errors + - stats->tx_window_errors + - stats->tx_heartbeat_errors, - stats->tx_compressed); + return __netdev_find_upper(dev, upper_dev); } +EXPORT_SYMBOL(netdev_has_upper_dev); -/* - * Called from the PROCfs module. This now uses the new arbitrary sized - * /proc/net interface to create /proc/net/dev +/** + * netdev_has_any_upper_dev - Check if device is linked to some device + * @dev: device + * + * Find out if a device is linked to an upper device and return true in case + * it is. The caller must hold the RTNL lock. */ -static int dev_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Inter-| Receive " - " | Transmit\n" - " face |bytes packets errs drop fifo frame " - "compressed multicast|bytes packets errs " - "drop fifo colls carrier compressed\n"); - else - dev_seq_printf_stats(seq, v); - return 0; -} - -static struct softnet_data *softnet_get_online(loff_t *pos) +bool netdev_has_any_upper_dev(struct net_device *dev) { - struct softnet_data *sd = NULL; - - while (*pos < nr_cpu_ids) - if (cpu_online(*pos)) { - sd = &per_cpu(softnet_data, *pos); - break; - } else - ++*pos; - return sd; -} - -static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) -{ - return softnet_get_online(pos); -} - -static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return softnet_get_online(pos); -} - -static void softnet_seq_stop(struct seq_file *seq, void *v) -{ -} - -static int softnet_seq_show(struct seq_file *seq, void *v) -{ - struct softnet_data *sd = v; + ASSERT_RTNL(); - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - sd->processed, sd->dropped, sd->time_squeeze, 0, - 0, 0, 0, 0, /* was fastroute */ - sd->cpu_collision, sd->received_rps); - return 0; + return !list_empty(&dev->upper_dev_list); } +EXPORT_SYMBOL(netdev_has_any_upper_dev); -static const struct seq_operations dev_seq_ops = { - .start = dev_seq_start, - .next = dev_seq_next, - .stop = dev_seq_stop, - .show = dev_seq_show, -}; - -static int dev_seq_open(struct inode *inode, struct file *file) +/** + * netdev_master_upper_dev_get - Get master upper device + * @dev: device + * + * Find a master upper device and return pointer to it or NULL in case + * it's not there. The caller must hold the RTNL lock. + */ +struct net_device *netdev_master_upper_dev_get(struct net_device *dev) { - return seq_open_net(inode, file, &dev_seq_ops, - sizeof(struct seq_net_private)); -} + struct netdev_upper *upper; -static const struct file_operations dev_seq_fops = { - .owner = THIS_MODULE, - .open = dev_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; + ASSERT_RTNL(); -static const struct seq_operations softnet_seq_ops = { - .start = softnet_seq_start, - .next = softnet_seq_next, - .stop = softnet_seq_stop, - .show = softnet_seq_show, -}; + if (list_empty(&dev->upper_dev_list)) + return NULL; -static int softnet_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &softnet_seq_ops); + upper = list_first_entry(&dev->upper_dev_list, + struct netdev_upper, list); + if (likely(upper->master)) + return upper->dev; + return NULL; } +EXPORT_SYMBOL(netdev_master_upper_dev_get); -static const struct file_operations softnet_seq_fops = { - .owner = THIS_MODULE, - .open = softnet_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static void *ptype_get_idx(loff_t pos) +/** + * netdev_master_upper_dev_get_rcu - Get master upper device + * @dev: device + * + * Find a master upper device and return pointer to it or NULL in case + * it's not there. The caller must hold the RCU read lock. + */ +struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) { - struct packet_type *pt = NULL; - loff_t i = 0; - int t; + struct netdev_upper *upper; - list_for_each_entry_rcu(pt, &ptype_all, list) { - if (i == pos) - return pt; - ++i; - } - - for (t = 0; t < PTYPE_HASH_SIZE; t++) { - list_for_each_entry_rcu(pt, &ptype_base[t], list) { - if (i == pos) - return pt; - ++i; - } - } + upper = list_first_or_null_rcu(&dev->upper_dev_list, + struct netdev_upper, list); + if (upper && likely(upper->master)) + return upper->dev; return NULL; } +EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); -static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) +static int __netdev_upper_dev_link(struct net_device *dev, + struct net_device *upper_dev, bool master) { - rcu_read_lock(); - return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; -} + struct netdev_upper *upper; -static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct packet_type *pt; - struct list_head *nxt; - int hash; + ASSERT_RTNL(); - ++*pos; - if (v == SEQ_START_TOKEN) - return ptype_get_idx(0); + if (dev == upper_dev) + return -EBUSY; - pt = v; - nxt = pt->list.next; - if (pt->type == htons(ETH_P_ALL)) { - if (nxt != &ptype_all) - goto found; - hash = 0; - nxt = ptype_base[0].next; - } else - hash = ntohs(pt->type) & PTYPE_HASH_MASK; + /* To prevent loops, check if dev is not upper device to upper_dev. */ + if (__netdev_search_upper_dev(upper_dev, dev)) + return -EBUSY; - while (nxt == &ptype_base[hash]) { - if (++hash >= PTYPE_HASH_SIZE) - return NULL; - nxt = ptype_base[hash].next; - } -found: - return list_entry(nxt, struct packet_type, list); -} + if (__netdev_find_upper(dev, upper_dev)) + return -EEXIST; -static void ptype_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} + if (master && netdev_master_upper_dev_get(dev)) + return -EBUSY; -static int ptype_seq_show(struct seq_file *seq, void *v) -{ - struct packet_type *pt = v; + upper = kmalloc(sizeof(*upper), GFP_KERNEL); + if (!upper) + return -ENOMEM; - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { - if (pt->type == htons(ETH_P_ALL)) - seq_puts(seq, "ALL "); - else - seq_printf(seq, "%04x", ntohs(pt->type)); + upper->dev = upper_dev; + upper->master = master; + INIT_LIST_HEAD(&upper->search_list); - seq_printf(seq, " %-8s %pF\n", - pt->dev ? pt->dev->name : "", pt->func); - } + /* Ensure that master upper link is always the first item in list. */ + if (master) + list_add_rcu(&upper->list, &dev->upper_dev_list); + else + list_add_tail_rcu(&upper->list, &dev->upper_dev_list); + dev_hold(upper_dev); return 0; } -static const struct seq_operations ptype_seq_ops = { - .start = ptype_seq_start, - .next = ptype_seq_next, - .stop = ptype_seq_stop, - .show = ptype_seq_show, -}; - -static int ptype_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ptype_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations ptype_seq_fops = { - .owner = THIS_MODULE, - .open = ptype_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - - -static int __net_init dev_proc_net_init(struct net *net) -{ - int rc = -ENOMEM; - - if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) - goto out; - if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) - goto out_dev; - if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) - goto out_softnet; - - if (wext_proc_init(net)) - goto out_ptype; - rc = 0; -out: - return rc; -out_ptype: - proc_net_remove(net, "ptype"); -out_softnet: - proc_net_remove(net, "softnet_stat"); -out_dev: - proc_net_remove(net, "dev"); - goto out; -} - -static void __net_exit dev_proc_net_exit(struct net *net) -{ - wext_proc_exit(net); - - proc_net_remove(net, "ptype"); - proc_net_remove(net, "softnet_stat"); - proc_net_remove(net, "dev"); -} - -static struct pernet_operations __net_initdata dev_proc_ops = { - .init = dev_proc_net_init, - .exit = dev_proc_net_exit, -}; - -static int __init dev_proc_init(void) +/** + * netdev_upper_dev_link - Add a link to the upper device + * @dev: device + * @upper_dev: new upper device + * + * Adds a link to device which is upper to this one. The caller must hold + * the RTNL lock. On a failure a negative errno code is returned. + * On success the reference counts are adjusted and the function + * returns zero. + */ +int netdev_upper_dev_link(struct net_device *dev, + struct net_device *upper_dev) { - return register_pernet_subsys(&dev_proc_ops); + return __netdev_upper_dev_link(dev, upper_dev, false); } -#else -#define dev_proc_init() 0 -#endif /* CONFIG_PROC_FS */ - +EXPORT_SYMBOL(netdev_upper_dev_link); /** - * netdev_set_master - set up master pointer - * @slave: slave device - * @master: new master device + * netdev_master_upper_dev_link - Add a master link to the upper device + * @dev: device + * @upper_dev: new upper device * - * Changes the master device of the slave. Pass %NULL to break the - * bonding. The caller must hold the RTNL semaphore. On a failure - * a negative errno code is returned. On success the reference counts - * are adjusted and the function returns zero. + * Adds a link to device which is upper to this one. In this case, only + * one master upper device can be linked, although other non-master devices + * might be linked as well. The caller must hold the RTNL lock. + * On a failure a negative errno code is returned. On success the reference + * counts are adjusted and the function returns zero. */ -int netdev_set_master(struct net_device *slave, struct net_device *master) +int netdev_master_upper_dev_link(struct net_device *dev, + struct net_device *upper_dev) { - struct net_device *old = slave->master; - - ASSERT_RTNL(); - - if (master) { - if (old) - return -EBUSY; - dev_hold(master); - } - - slave->master = master; - - if (old) - dev_put(old); - return 0; + return __netdev_upper_dev_link(dev, upper_dev, true); } -EXPORT_SYMBOL(netdev_set_master); +EXPORT_SYMBOL(netdev_master_upper_dev_link); /** - * netdev_set_bond_master - set up bonding master/slave pair - * @slave: slave device - * @master: new master device + * netdev_upper_dev_unlink - Removes a link to upper device + * @dev: device + * @upper_dev: new upper device * - * Changes the master device of the slave. Pass %NULL to break the - * bonding. The caller must hold the RTNL semaphore. On a failure - * a negative errno code is returned. On success %RTM_NEWLINK is sent - * to the routing socket and the function returns zero. + * Removes a link to device which is upper to this one. The caller must hold + * the RTNL lock. */ -int netdev_set_bond_master(struct net_device *slave, struct net_device *master) +void netdev_upper_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) { - int err; + struct netdev_upper *upper; ASSERT_RTNL(); - err = netdev_set_master(slave, master); - if (err) - return err; - if (master) - slave->flags |= IFF_SLAVE; - else - slave->flags &= ~IFF_SLAVE; - - rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); - return 0; + upper = __netdev_find_upper(dev, upper_dev); + if (!upper) + return; + list_del_rcu(&upper->list); + dev_put(upper_dev); + kfree_rcu(upper, rcu); } -EXPORT_SYMBOL(netdev_set_bond_master); +EXPORT_SYMBOL(netdev_upper_dev_unlink); static void dev_change_rx_flags(struct net_device *dev, int flags) { @@ -5020,381 +4777,33 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) if (!netif_device_present(dev)) return -ENODEV; err = ops->ndo_set_mac_address(dev, sa); - if (!err) - call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + if (err) + return err; + dev->addr_assign_type = NET_ADDR_SET; + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); add_device_randomness(dev->dev_addr, dev->addr_len); - return err; + return 0; } EXPORT_SYMBOL(dev_set_mac_address); -/* - * Perform the SIOCxIFxxx calls, inside rcu_read_lock() - */ -static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) -{ - int err; - struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name); - - if (!dev) - return -ENODEV; - - switch (cmd) { - case SIOCGIFFLAGS: /* Get interface flags */ - ifr->ifr_flags = (short) dev_get_flags(dev); - return 0; - - case SIOCGIFMETRIC: /* Get the metric on the interface - (currently unused) */ - ifr->ifr_metric = 0; - return 0; - - case SIOCGIFMTU: /* Get the MTU of a device */ - ifr->ifr_mtu = dev->mtu; - return 0; - - case SIOCGIFHWADDR: - if (!dev->addr_len) - memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); - else - memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - ifr->ifr_hwaddr.sa_family = dev->type; - return 0; - - case SIOCGIFSLAVE: - err = -EINVAL; - break; - - case SIOCGIFMAP: - ifr->ifr_map.mem_start = dev->mem_start; - ifr->ifr_map.mem_end = dev->mem_end; - ifr->ifr_map.base_addr = dev->base_addr; - ifr->ifr_map.irq = dev->irq; - ifr->ifr_map.dma = dev->dma; - ifr->ifr_map.port = dev->if_port; - return 0; - - case SIOCGIFINDEX: - ifr->ifr_ifindex = dev->ifindex; - return 0; - - case SIOCGIFTXQLEN: - ifr->ifr_qlen = dev->tx_queue_len; - return 0; - - default: - /* dev_ioctl() should ensure this case - * is never reached - */ - WARN_ON(1); - err = -ENOTTY; - break; - - } - return err; -} - -/* - * Perform the SIOCxIFxxx calls, inside rtnl_lock() - */ -static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) -{ - int err; - struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); - const struct net_device_ops *ops; - - if (!dev) - return -ENODEV; - - ops = dev->netdev_ops; - - switch (cmd) { - case SIOCSIFFLAGS: /* Set interface flags */ - return dev_change_flags(dev, ifr->ifr_flags); - - case SIOCSIFMETRIC: /* Set the metric on the interface - (currently unused) */ - return -EOPNOTSUPP; - - case SIOCSIFMTU: /* Set the MTU of a device */ - return dev_set_mtu(dev, ifr->ifr_mtu); - - case SIOCSIFHWADDR: - return dev_set_mac_address(dev, &ifr->ifr_hwaddr); - - case SIOCSIFHWBROADCAST: - if (ifr->ifr_hwaddr.sa_family != dev->type) - return -EINVAL; - memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); - return 0; - - case SIOCSIFMAP: - if (ops->ndo_set_config) { - if (!netif_device_present(dev)) - return -ENODEV; - return ops->ndo_set_config(dev, &ifr->ifr_map); - } - return -EOPNOTSUPP; - - case SIOCADDMULTI: - if (!ops->ndo_set_rx_mode || - ifr->ifr_hwaddr.sa_family != AF_UNSPEC) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); - - case SIOCDELMULTI: - if (!ops->ndo_set_rx_mode || - ifr->ifr_hwaddr.sa_family != AF_UNSPEC) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); - - case SIOCSIFTXQLEN: - if (ifr->ifr_qlen < 0) - return -EINVAL; - dev->tx_queue_len = ifr->ifr_qlen; - return 0; - - case SIOCSIFNAME: - ifr->ifr_newname[IFNAMSIZ-1] = '\0'; - return dev_change_name(dev, ifr->ifr_newname); - - case SIOCSHWTSTAMP: - err = net_hwtstamp_validate(ifr); - if (err) - return err; - /* fall through */ - - /* - * Unknown or private ioctl - */ - default: - if ((cmd >= SIOCDEVPRIVATE && - cmd <= SIOCDEVPRIVATE + 15) || - cmd == SIOCBONDENSLAVE || - cmd == SIOCBONDRELEASE || - cmd == SIOCBONDSETHWADDR || - cmd == SIOCBONDSLAVEINFOQUERY || - cmd == SIOCBONDINFOQUERY || - cmd == SIOCBONDCHANGEACTIVE || - cmd == SIOCGMIIPHY || - cmd == SIOCGMIIREG || - cmd == SIOCSMIIREG || - cmd == SIOCBRADDIF || - cmd == SIOCBRDELIF || - cmd == SIOCSHWTSTAMP || - cmd == SIOCWANDEV) { - err = -EOPNOTSUPP; - if (ops->ndo_do_ioctl) { - if (netif_device_present(dev)) - err = ops->ndo_do_ioctl(dev, ifr, cmd); - else - err = -ENODEV; - } - } else - err = -EINVAL; - - } - return err; -} - -/* - * This function handles all "interface"-type I/O control requests. The actual - * 'doing' part of this is dev_ifsioc above. - */ - /** - * dev_ioctl - network device ioctl - * @net: the applicable net namespace - * @cmd: command to issue - * @arg: pointer to a struct ifreq in user space + * dev_change_carrier - Change device carrier + * @dev: device + * @new_carrier: new value * - * Issue ioctl functions to devices. This is normally called by the - * user space syscall interfaces but can sometimes be useful for - * other purposes. The return value is the return from the syscall if - * positive or a negative errno code on error. + * Change device carrier */ - -int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int dev_change_carrier(struct net_device *dev, bool new_carrier) { - struct ifreq ifr; - int ret; - char *colon; - - /* One special case: SIOCGIFCONF takes ifconf argument - and requires shared lock, because it sleeps writing - to user space. - */ - - if (cmd == SIOCGIFCONF) { - rtnl_lock(); - ret = dev_ifconf(net, (char __user *) arg); - rtnl_unlock(); - return ret; - } - if (cmd == SIOCGIFNAME) - return dev_ifname(net, (struct ifreq __user *)arg); - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - - ifr.ifr_name[IFNAMSIZ-1] = 0; - - colon = strchr(ifr.ifr_name, ':'); - if (colon) - *colon = 0; - - /* - * See which interface the caller is talking about. - */ - - switch (cmd) { - /* - * These ioctl calls: - * - can be done by all. - * - atomic and do not require locking. - * - return a value - */ - case SIOCGIFFLAGS: - case SIOCGIFMETRIC: - case SIOCGIFMTU: - case SIOCGIFHWADDR: - case SIOCGIFSLAVE: - case SIOCGIFMAP: - case SIOCGIFINDEX: - case SIOCGIFTXQLEN: - dev_load(net, ifr.ifr_name); - rcu_read_lock(); - ret = dev_ifsioc_locked(net, &ifr, cmd); - rcu_read_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; - - case SIOCETHTOOL: - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ethtool(net, &ifr); - rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; - - /* - * These ioctl calls: - * - require superuser power. - * - require strict serialization. - * - return a value - */ - case SIOCGMIIPHY: - case SIOCGMIIREG: - case SIOCSIFNAME: - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); - rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; - - /* - * These ioctl calls: - * - require superuser power. - * - require strict serialization. - * - do not return a value - */ - case SIOCSIFMAP: - case SIOCSIFTXQLEN: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - /* fall through */ - /* - * These ioctl calls: - * - require local superuser power. - * - require strict serialization. - * - do not return a value - */ - case SIOCSIFFLAGS: - case SIOCSIFMETRIC: - case SIOCSIFMTU: - case SIOCSIFHWADDR: - case SIOCSIFSLAVE: - case SIOCADDMULTI: - case SIOCDELMULTI: - case SIOCSIFHWBROADCAST: - case SIOCSMIIREG: - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: - case SIOCBRADDIF: - case SIOCBRDELIF: - case SIOCSHWTSTAMP: - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - /* fall through */ - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); - rtnl_unlock(); - return ret; - - case SIOCGIFMEM: - /* Get the per device memory space. We can add this but - * currently do not support it */ - case SIOCSIFMEM: - /* Set the per device memory buffer space. - * Not applicable in our case */ - case SIOCSIFLINK: - return -ENOTTY; + const struct net_device_ops *ops = dev->netdev_ops; - /* - * Unknown or private ioctl. - */ - default: - if (cmd == SIOCWANDEV || - (cmd >= SIOCDEVPRIVATE && - cmd <= SIOCDEVPRIVATE + 15)) { - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); - rtnl_unlock(); - if (!ret && copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - return ret; - } - /* Take care of Wireless Extensions */ - if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) - return wext_handle_ioctl(net, &ifr, cmd, arg); - return -ENOTTY; - } + if (!ops->ndo_change_carrier) + return -EOPNOTSUPP; + if (!netif_device_present(dev)) + return -ENODEV; + return ops->ndo_change_carrier(dev, new_carrier); } - +EXPORT_SYMBOL(dev_change_carrier); /** * dev_new_index - allocate an ifindex @@ -5482,11 +4891,15 @@ static void rollback_registered_many(struct list_head *head) if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); - /* Notifier chain MUST detach us from master device. */ - WARN_ON(dev->master); + /* Notifier chain MUST detach us all upper devices. */ + WARN_ON(netdev_has_any_upper_dev(dev)); /* Remove entries from kobject tree */ netdev_unregister_kobject(dev); +#ifdef CONFIG_XPS + /* Remove XPS queueing entries */ + netif_reset_xps_queues_gt(dev, 0); +#endif } synchronize_net(); @@ -5664,10 +5077,9 @@ static int netif_alloc_rx_queues(struct net_device *dev) BUG_ON(count < 1); rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); - if (!rx) { - pr_err("netdev: Unable to allocate %u rx queues\n", count); + if (!rx) return -ENOMEM; - } + dev->_rx = rx; for (i = 0; i < count; i++) @@ -5698,10 +5110,9 @@ static int netif_alloc_netdev_queues(struct net_device *dev) BUG_ON(count < 1); tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); - if (!tx) { - pr_err("netdev: Unable to allocate %u tx queues\n", count); + if (!tx) return -ENOMEM; - } + dev->_tx = tx; netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); @@ -5760,6 +5171,14 @@ int register_netdevice(struct net_device *dev) } } + if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) && + (!dev->netdev_ops->ndo_vlan_rx_add_vid || + !dev->netdev_ops->ndo_vlan_rx_kill_vid)) { + netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n"); + ret = -EINVAL; + goto err_uninit; + } + ret = -EBUSY; if (!dev->ifindex) dev->ifindex = dev_new_index(net); @@ -5815,6 +5234,13 @@ int register_netdevice(struct net_device *dev) list_netdevice(dev); add_device_randomness(dev->dev_addr, dev->addr_len); + /* If the device has permanent device address, driver should + * set dev_addr and also addr_assign_type should be set to + * NET_ADDR_PERM (default value). + */ + if (dev->addr_assign_type == NET_ADDR_PERM) + memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); + /* Notify protocols, that a new device appeared. */ ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); ret = notifier_to_errno(ret); @@ -6121,6 +5547,14 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) static const struct ethtool_ops default_ethtool_ops; +void netdev_set_default_ethtool_ops(struct net_device *dev, + const struct ethtool_ops *ops) +{ + if (dev->ethtool_ops == &default_ethtool_ops) + dev->ethtool_ops = ops; +} +EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); + /** * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -6165,10 +5599,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, alloc_size += NETDEV_ALIGN - 1; p = kzalloc(alloc_size, GFP_KERNEL); - if (!p) { - pr_err("alloc_netdev: Unable to allocate device\n"); + if (!p) return NULL; - } dev = PTR_ALIGN(p, NETDEV_ALIGN); dev->padded = (char *)dev - (char *)p; @@ -6191,6 +5623,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); INIT_LIST_HEAD(&dev->link_watch_list); + INIT_LIST_HEAD(&dev->upper_dev_list); dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); @@ -6834,19 +6267,9 @@ static int __init net_dev_init(void) hotcpu_notifier(dev_cpu_callback, 0); dst_init(); - dev_mcast_init(); rc = 0; out: return rc; } subsys_initcall(net_dev_init); - -static int __init initialize_hashrnd(void) -{ - get_random_bytes(&hashrnd, sizeof(hashrnd)); - return 0; -} - -late_initcall_sync(initialize_hashrnd); - diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index b079c7b..abdc9e6 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -15,7 +15,6 @@ #include <linux/rtnetlink.h> #include <linux/export.h> #include <linux/list.h> -#include <linux/proc_fs.h> /* * General list handling functions @@ -38,7 +37,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, ha->type = addr_type; ha->refcount = 1; ha->global_use = global; - ha->synced = false; + ha->synced = 0; list_add_tail_rcu(&ha->list, &list->list); list->count++; @@ -166,7 +165,7 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, addr_len, ha->type); if (err) break; - ha->synced = true; + ha->synced++; ha->refcount++; } else if (ha->refcount == 1) { __hw_addr_del(to_list, ha->addr, addr_len, ha->type); @@ -187,7 +186,7 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, if (ha->synced) { __hw_addr_del(to_list, ha->addr, addr_len, ha->type); - ha->synced = false; + ha->synced--; __hw_addr_del(from_list, ha->addr, addr_len, ha->type); } @@ -727,76 +726,3 @@ void dev_mc_init(struct net_device *dev) __hw_addr_init(&dev->mc); } EXPORT_SYMBOL(dev_mc_init); - -#ifdef CONFIG_PROC_FS -#include <linux/seq_file.h> - -static int dev_mc_seq_show(struct seq_file *seq, void *v) -{ - struct netdev_hw_addr *ha; - struct net_device *dev = v; - - if (v == SEQ_START_TOKEN) - return 0; - - netif_addr_lock_bh(dev); - netdev_for_each_mc_addr(ha, dev) { - int i; - - seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, - dev->name, ha->refcount, ha->global_use); - - for (i = 0; i < dev->addr_len; i++) - seq_printf(seq, "%02x", ha->addr[i]); - - seq_putc(seq, '\n'); - } - netif_addr_unlock_bh(dev); - return 0; -} - -static const struct seq_operations dev_mc_seq_ops = { - .start = dev_seq_start, - .next = dev_seq_next, - .stop = dev_seq_stop, - .show = dev_mc_seq_show, -}; - -static int dev_mc_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &dev_mc_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations dev_mc_seq_fops = { - .owner = THIS_MODULE, - .open = dev_mc_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -#endif - -static int __net_init dev_mc_net_init(struct net *net) -{ - if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops)) - return -ENOMEM; - return 0; -} - -static void __net_exit dev_mc_net_exit(struct net *net) -{ - proc_net_remove(net, "dev_mcast"); -} - -static struct pernet_operations __net_initdata dev_mc_net_ops = { - .init = dev_mc_net_init, - .exit = dev_mc_net_exit, -}; - -void __init dev_mcast_init(void) -{ - register_pernet_subsys(&dev_mc_net_ops); -} - diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c new file mode 100644 index 0000000..6cc0481 --- /dev/null +++ b/net/core/dev_ioctl.c @@ -0,0 +1,576 @@ +#include <linux/kmod.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <linux/net_tstamp.h> +#include <linux/wireless.h> +#include <net/wext.h> + +/* + * Map an interface index to its name (SIOCGIFNAME) + */ + +/* + * We need this ioctl for efficient implementation of the + * if_indextoname() function required by the IPv6 API. Without + * it, we would have to search all the interfaces to find a + * match. --pb + */ + +static int dev_ifname(struct net *net, struct ifreq __user *arg) +{ + struct net_device *dev; + struct ifreq ifr; + unsigned seq; + + /* + * Fetch the caller's info block. + */ + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; + +retry: + seq = read_seqcount_begin(&devnet_rename_seq); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + + strcpy(ifr.ifr_name, dev->name); + rcu_read_unlock(); + if (read_seqcount_retry(&devnet_rename_seq, seq)) + goto retry; + + if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + return 0; +} + +static gifconf_func_t *gifconf_list[NPROTO]; + +/** + * register_gifconf - register a SIOCGIF handler + * @family: Address family + * @gifconf: Function handler + * + * Register protocol dependent address dumping routines. The handler + * that is passed must not be freed or reused until it has been replaced + * by another handler. + */ +int register_gifconf(unsigned int family, gifconf_func_t *gifconf) +{ + if (family >= NPROTO) + return -EINVAL; + gifconf_list[family] = gifconf; + return 0; +} +EXPORT_SYMBOL(register_gifconf); + +/* + * Perform a SIOCGIFCONF call. This structure will change + * size eventually, and there is nothing I can do about it. + * Thus we will need a 'compatibility mode'. + */ + +static int dev_ifconf(struct net *net, char __user *arg) +{ + struct ifconf ifc; + struct net_device *dev; + char __user *pos; + int len; + int total; + int i; + + /* + * Fetch the caller's info block. + */ + + if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) + return -EFAULT; + + pos = ifc.ifc_buf; + len = ifc.ifc_len; + + /* + * Loop over the interfaces, and write an info block for each. + */ + + total = 0; + for_each_netdev(net, dev) { + for (i = 0; i < NPROTO; i++) { + if (gifconf_list[i]) { + int done; + if (!pos) + done = gifconf_list[i](dev, NULL, 0); + else + done = gifconf_list[i](dev, pos + total, + len - total); + if (done < 0) + return -EFAULT; + total += done; + } + } + } + + /* + * All done. Write the updated control block back to the caller. + */ + ifc.ifc_len = total; + + /* + * Both BSD and Solaris return 0 here, so we do too. + */ + return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; +} + +/* + * Perform the SIOCxIFxxx calls, inside rcu_read_lock() + */ +static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) +{ + int err; + struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name); + + if (!dev) + return -ENODEV; + + switch (cmd) { + case SIOCGIFFLAGS: /* Get interface flags */ + ifr->ifr_flags = (short) dev_get_flags(dev); + return 0; + + case SIOCGIFMETRIC: /* Get the metric on the interface + (currently unused) */ + ifr->ifr_metric = 0; + return 0; + + case SIOCGIFMTU: /* Get the MTU of a device */ + ifr->ifr_mtu = dev->mtu; + return 0; + + case SIOCGIFHWADDR: + if (!dev->addr_len) + memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); + else + memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, + min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + ifr->ifr_hwaddr.sa_family = dev->type; + return 0; + + case SIOCGIFSLAVE: + err = -EINVAL; + break; + + case SIOCGIFMAP: + ifr->ifr_map.mem_start = dev->mem_start; + ifr->ifr_map.mem_end = dev->mem_end; + ifr->ifr_map.base_addr = dev->base_addr; + ifr->ifr_map.irq = dev->irq; + ifr->ifr_map.dma = dev->dma; + ifr->ifr_map.port = dev->if_port; + return 0; + + case SIOCGIFINDEX: + ifr->ifr_ifindex = dev->ifindex; + return 0; + + case SIOCGIFTXQLEN: + ifr->ifr_qlen = dev->tx_queue_len; + return 0; + + default: + /* dev_ioctl() should ensure this case + * is never reached + */ + WARN_ON(1); + err = -ENOTTY; + break; + + } + return err; +} + +static int net_hwtstamp_validate(struct ifreq *ifr) +{ + struct hwtstamp_config cfg; + enum hwtstamp_tx_types tx_type; + enum hwtstamp_rx_filters rx_filter; + int tx_type_valid = 0; + int rx_filter_valid = 0; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + if (cfg.flags) /* reserved for future extensions */ + return -EINVAL; + + tx_type = cfg.tx_type; + rx_filter = cfg.rx_filter; + + switch (tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + case HWTSTAMP_TX_ONESTEP_SYNC: + tx_type_valid = 1; + break; + } + + switch (rx_filter) { + case HWTSTAMP_FILTER_NONE: + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + rx_filter_valid = 1; + break; + } + + if (!tx_type_valid || !rx_filter_valid) + return -ERANGE; + + return 0; +} + +/* + * Perform the SIOCxIFxxx calls, inside rtnl_lock() + */ +static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) +{ + int err; + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + const struct net_device_ops *ops; + + if (!dev) + return -ENODEV; + + ops = dev->netdev_ops; + + switch (cmd) { + case SIOCSIFFLAGS: /* Set interface flags */ + return dev_change_flags(dev, ifr->ifr_flags); + + case SIOCSIFMETRIC: /* Set the metric on the interface + (currently unused) */ + return -EOPNOTSUPP; + + case SIOCSIFMTU: /* Set the MTU of a device */ + return dev_set_mtu(dev, ifr->ifr_mtu); + + case SIOCSIFHWADDR: + return dev_set_mac_address(dev, &ifr->ifr_hwaddr); + + case SIOCSIFHWBROADCAST: + if (ifr->ifr_hwaddr.sa_family != dev->type) + return -EINVAL; + memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, + min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return 0; + + case SIOCSIFMAP: + if (ops->ndo_set_config) { + if (!netif_device_present(dev)) + return -ENODEV; + return ops->ndo_set_config(dev, &ifr->ifr_map); + } + return -EOPNOTSUPP; + + case SIOCADDMULTI: + if (!ops->ndo_set_rx_mode || + ifr->ifr_hwaddr.sa_family != AF_UNSPEC) + return -EINVAL; + if (!netif_device_present(dev)) + return -ENODEV; + return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); + + case SIOCDELMULTI: + if (!ops->ndo_set_rx_mode || + ifr->ifr_hwaddr.sa_family != AF_UNSPEC) + return -EINVAL; + if (!netif_device_present(dev)) + return -ENODEV; + return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); + + case SIOCSIFTXQLEN: + if (ifr->ifr_qlen < 0) + return -EINVAL; + dev->tx_queue_len = ifr->ifr_qlen; + return 0; + + case SIOCSIFNAME: + ifr->ifr_newname[IFNAMSIZ-1] = '\0'; + return dev_change_name(dev, ifr->ifr_newname); + + case SIOCSHWTSTAMP: + err = net_hwtstamp_validate(ifr); + if (err) + return err; + /* fall through */ + + /* + * Unknown or private ioctl + */ + default: + if ((cmd >= SIOCDEVPRIVATE && + cmd <= SIOCDEVPRIVATE + 15) || + cmd == SIOCBONDENSLAVE || + cmd == SIOCBONDRELEASE || + cmd == SIOCBONDSETHWADDR || + cmd == SIOCBONDSLAVEINFOQUERY || + cmd == SIOCBONDINFOQUERY || + cmd == SIOCBONDCHANGEACTIVE || + cmd == SIOCGMIIPHY || + cmd == SIOCGMIIREG || + cmd == SIOCSMIIREG || + cmd == SIOCBRADDIF || + cmd == SIOCBRDELIF || + cmd == SIOCSHWTSTAMP || + cmd == SIOCWANDEV) { + err = -EOPNOTSUPP; + if (ops->ndo_do_ioctl) { + if (netif_device_present(dev)) + err = ops->ndo_do_ioctl(dev, ifr, cmd); + else + err = -ENODEV; + } + } else + err = -EINVAL; + + } + return err; +} + +/** + * dev_load - load a network module + * @net: the applicable net namespace + * @name: name of interface + * + * If a network interface is not present and the process has suitable + * privileges this function loads the module. If module loading is not + * available in this kernel then it becomes a nop. + */ + +void dev_load(struct net *net, const char *name) +{ + struct net_device *dev; + int no_module; + + rcu_read_lock(); + dev = dev_get_by_name_rcu(net, name); + rcu_read_unlock(); + + no_module = !dev; + if (no_module && capable(CAP_NET_ADMIN)) + no_module = request_module("netdev-%s", name); + if (no_module && capable(CAP_SYS_MODULE)) { + if (!request_module("%s", name)) + pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", + name); + } +} +EXPORT_SYMBOL(dev_load); + +/* + * This function handles all "interface"-type I/O control requests. The actual + * 'doing' part of this is dev_ifsioc above. + */ + +/** + * dev_ioctl - network device ioctl + * @net: the applicable net namespace + * @cmd: command to issue + * @arg: pointer to a struct ifreq in user space + * + * Issue ioctl functions to devices. This is normally called by the + * user space syscall interfaces but can sometimes be useful for + * other purposes. The return value is the return from the syscall if + * positive or a negative errno code on error. + */ + +int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) +{ + struct ifreq ifr; + int ret; + char *colon; + + /* One special case: SIOCGIFCONF takes ifconf argument + and requires shared lock, because it sleeps writing + to user space. + */ + + if (cmd == SIOCGIFCONF) { + rtnl_lock(); + ret = dev_ifconf(net, (char __user *) arg); + rtnl_unlock(); + return ret; + } + if (cmd == SIOCGIFNAME) + return dev_ifname(net, (struct ifreq __user *)arg); + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; + + ifr.ifr_name[IFNAMSIZ-1] = 0; + + colon = strchr(ifr.ifr_name, ':'); + if (colon) + *colon = 0; + + /* + * See which interface the caller is talking about. + */ + + switch (cmd) { + /* + * These ioctl calls: + * - can be done by all. + * - atomic and do not require locking. + * - return a value + */ + case SIOCGIFFLAGS: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFHWADDR: + case SIOCGIFSLAVE: + case SIOCGIFMAP: + case SIOCGIFINDEX: + case SIOCGIFTXQLEN: + dev_load(net, ifr.ifr_name); + rcu_read_lock(); + ret = dev_ifsioc_locked(net, &ifr, cmd); + rcu_read_unlock(); + if (!ret) { + if (colon) + *colon = ':'; + if (copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; + } + return ret; + + case SIOCETHTOOL: + dev_load(net, ifr.ifr_name); + rtnl_lock(); + ret = dev_ethtool(net, &ifr); + rtnl_unlock(); + if (!ret) { + if (colon) + *colon = ':'; + if (copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; + } + return ret; + + /* + * These ioctl calls: + * - require superuser power. + * - require strict serialization. + * - return a value + */ + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCSIFNAME: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + dev_load(net, ifr.ifr_name); + rtnl_lock(); + ret = dev_ifsioc(net, &ifr, cmd); + rtnl_unlock(); + if (!ret) { + if (colon) + *colon = ':'; + if (copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; + } + return ret; + + /* + * These ioctl calls: + * - require superuser power. + * - require strict serialization. + * - do not return a value + */ + case SIOCSIFMAP: + case SIOCSIFTXQLEN: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + /* fall through */ + /* + * These ioctl calls: + * - require local superuser power. + * - require strict serialization. + * - do not return a value + */ + case SIOCSIFFLAGS: + case SIOCSIFMETRIC: + case SIOCSIFMTU: + case SIOCSIFHWADDR: + case SIOCSIFSLAVE: + case SIOCADDMULTI: + case SIOCDELMULTI: + case SIOCSIFHWBROADCAST: + case SIOCSMIIREG: + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDCHANGEACTIVE: + case SIOCBRADDIF: + case SIOCBRDELIF: + case SIOCSHWTSTAMP: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + /* fall through */ + case SIOCBONDSLAVEINFOQUERY: + case SIOCBONDINFOQUERY: + dev_load(net, ifr.ifr_name); + rtnl_lock(); + ret = dev_ifsioc(net, &ifr, cmd); + rtnl_unlock(); + return ret; + + case SIOCGIFMEM: + /* Get the per device memory space. We can add this but + * currently do not support it */ + case SIOCSIFMEM: + /* Set the per device memory buffer space. + * Not applicable in our case */ + case SIOCSIFLINK: + return -ENOTTY; + + /* + * Unknown or private ioctl. + */ + default: + if (cmd == SIOCWANDEV || + (cmd >= SIOCDEVPRIVATE && + cmd <= SIOCDEVPRIVATE + 15)) { + dev_load(net, ifr.ifr_name); + rtnl_lock(); + ret = dev_ifsioc(net, &ifr, cmd); + rtnl_unlock(); + if (!ret && copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; + return ret; + } + /* Take care of Wireless Extensions */ + if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) + return wext_handle_ioctl(net, &ifr, cmd, arg); + return -ENOTTY; + } +} diff --git a/net/core/dst.c b/net/core/dst.c index ee6153e..35fd12f 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -179,6 +179,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; + dst->from = NULL; #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif diff --git a/net/core/ethtool.c b/net/core/ethtool.c index a870543..3e9b2c3 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -77,6 +77,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", + [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", @@ -175,7 +176,7 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset) if (sset == ETH_SS_FEATURES) return ARRAY_SIZE(netdev_features_strings); - if (ops && ops->get_sset_count && ops->get_strings) + if (ops->get_sset_count && ops->get_strings) return ops->get_sset_count(dev, sset); else return -EOPNOTSUPP; @@ -311,7 +312,7 @@ int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { ASSERT_RTNL(); - if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) + if (!dev->ethtool_ops->get_settings) return -EOPNOTSUPP; memset(cmd, 0, sizeof(struct ethtool_cmd)); @@ -355,7 +356,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, memset(&info, 0, sizeof(info)); info.cmd = ETHTOOL_GDRVINFO; - if (ops && ops->get_drvinfo) { + if (ops->get_drvinfo) { ops->get_drvinfo(dev, &info); } else if (dev->dev.parent && dev->dev.parent->driver) { strlcpy(info.bus_info, dev_name(dev->dev.parent), @@ -370,7 +371,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, * this method of obtaining string set info is deprecated; * Use ETHTOOL_GSSET_INFO instead. */ - if (ops && ops->get_sset_count) { + if (ops->get_sset_count) { int rc; rc = ops->get_sset_count(dev, ETH_SS_TEST); @@ -383,9 +384,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, if (rc >= 0) info.n_priv_flags = rc; } - if (ops && ops->get_regs_len) + if (ops->get_regs_len) info.regdump_len = ops->get_regs_len(dev); - if (ops && ops->get_eeprom_len) + if (ops->get_eeprom_len) info.eedump_len = ops->get_eeprom_len(dev); if (copy_to_user(useraddr, &info, sizeof(info))) @@ -590,13 +591,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, struct ethtool_rxnfc rx_rings; u32 user_size, dev_size, i; u32 *indir; + const struct ethtool_ops *ops = dev->ethtool_ops; int ret; - if (!dev->ethtool_ops->get_rxfh_indir_size || - !dev->ethtool_ops->set_rxfh_indir || - !dev->ethtool_ops->get_rxnfc) + if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir || + !ops->get_rxnfc) return -EOPNOTSUPP; - dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); + + dev_size = ops->get_rxfh_indir_size(dev); if (dev_size == 0) return -EOPNOTSUPP; @@ -613,7 +615,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, return -ENOMEM; rx_rings.cmd = ETHTOOL_GRXRINGS; - ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL); + ret = ops->get_rxnfc(dev, &rx_rings, NULL); if (ret) goto out; @@ -639,7 +641,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, } } - ret = dev->ethtool_ops->set_rxfh_indir(dev, indir); + ret = ops->set_rxfh_indir(dev, indir); out: kfree(indir); @@ -1082,9 +1084,10 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) { struct ethtool_value id; static bool busy; + const struct ethtool_ops *ops = dev->ethtool_ops; int rc; - if (!dev->ethtool_ops->set_phys_id) + if (!ops->set_phys_id) return -EOPNOTSUPP; if (busy) @@ -1093,7 +1096,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) if (copy_from_user(&id, useraddr, sizeof(id))) return -EFAULT; - rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE); + rc = ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE); if (rc < 0) return rc; @@ -1118,7 +1121,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) i = n; do { rtnl_lock(); - rc = dev->ethtool_ops->set_phys_id(dev, + rc = ops->set_phys_id(dev, (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON); rtnl_unlock(); if (rc) @@ -1133,7 +1136,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) dev_put(dev); busy = false; - (void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); + (void) ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); return rc; } @@ -1275,7 +1278,7 @@ static int ethtool_get_dump_flag(struct net_device *dev, struct ethtool_dump dump; const struct ethtool_ops *ops = dev->ethtool_ops; - if (!dev->ethtool_ops->get_dump_flag) + if (!ops->get_dump_flag) return -EOPNOTSUPP; if (copy_from_user(&dump, useraddr, sizeof(dump))) @@ -1299,8 +1302,7 @@ static int ethtool_get_dump_data(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; void *data = NULL; - if (!dev->ethtool_ops->get_dump_data || - !dev->ethtool_ops->get_dump_flag) + if (!ops->get_dump_data || !ops->get_dump_flag) return -EOPNOTSUPP; if (copy_from_user(&dump, useraddr, sizeof(dump))) @@ -1346,13 +1348,9 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr) info.cmd = ETHTOOL_GET_TS_INFO; if (phydev && phydev->drv && phydev->drv->ts_info) { - err = phydev->drv->ts_info(phydev, &info); - - } else if (dev->ethtool_ops && dev->ethtool_ops->get_ts_info) { - + } else if (ops->get_ts_info) { err = ops->get_ts_info(dev, &info); - } else { info.so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | diff --git a/net/core/filter.c b/net/core/filter.c index c23543c..2e20b55 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -532,6 +532,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X, }; int pc; + bool anc_found; if (flen == 0 || flen > BPF_MAXINSNS) return -EINVAL; @@ -592,8 +593,10 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) case BPF_S_LD_W_ABS: case BPF_S_LD_H_ABS: case BPF_S_LD_B_ABS: + anc_found = false; #define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ code = BPF_S_ANC_##CODE; \ + anc_found = true; \ break switch (ftest->k) { ANCILLARY(PROTOCOL); @@ -610,6 +613,10 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(VLAN_TAG); ANCILLARY(VLAN_TAG_PRESENT); } + + /* ancillary operation unknown or unsupported */ + if (anc_found == false && ftest->k >= SKF_AD_OFF) + return -EINVAL; } ftest->code = code; } @@ -714,6 +721,9 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) unsigned int fsize = sizeof(struct sock_filter) * fprog->len; int err; + if (sock_flag(sk, SOCK_FILTER_LOCKED)) + return -EPERM; + /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) return -EINVAL; @@ -750,6 +760,9 @@ int sk_detach_filter(struct sock *sk) int ret = -ENOENT; struct sk_filter *filter; + if (sock_flag(sk, SOCK_FILTER_LOCKED)) + return -EPERM; + filter = rcu_dereference_protected(sk->sk_filter, sock_owned_by_user(sk)); if (filter) { diff --git a/net/core/flow.c b/net/core/flow.c index b0901ee..2bfd081 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -132,14 +132,14 @@ static void __flow_cache_shrink(struct flow_cache *fc, int shrink_to) { struct flow_cache_entry *fle; - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; LIST_HEAD(gc_list); int i, deleted = 0; for (i = 0; i < flow_cache_hash_size(fc); i++) { int saved = 0; - hlist_for_each_entry_safe(fle, entry, tmp, + hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { if (saved < shrink_to && flow_entry_valid(fle)) { @@ -211,7 +211,6 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache *fc = &flow_cache_global; struct flow_cache_percpu *fcp; struct flow_cache_entry *fle, *tfle; - struct hlist_node *entry; struct flow_cache_object *flo; size_t keysize; unsigned int hash; @@ -235,7 +234,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, flow_new_hash_rnd(fc, fcp); hash = flow_hash_code(fc, fcp, key, keysize); - hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { + hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) { if (tfle->net == net && tfle->family == family && tfle->dir == dir && @@ -286,7 +285,7 @@ nocache: else fle->genid--; } else { - if (flo && !IS_ERR(flo)) + if (!IS_ERR_OR_NULL(flo)) flo->ops->delete(flo); } ret_object: @@ -301,13 +300,13 @@ static void flow_cache_flush_tasklet(unsigned long data) struct flow_cache *fc = info->cache; struct flow_cache_percpu *fcp; struct flow_cache_entry *fle; - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; LIST_HEAD(gc_list); int i, deleted = 0; fcp = this_cpu_ptr(fc->percpu); for (i = 0; i < flow_cache_hash_size(fc); i++) { - hlist_for_each_entry_safe(fle, entry, tmp, + hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { if (flow_entry_valid(fle)) continue; @@ -329,7 +328,7 @@ static void flow_cache_flush_per_cpu(void *data) struct flow_flush_info *info = data; struct tasklet_struct *tasklet; - tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet); + tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet; tasklet->data = (unsigned long)info; tasklet_schedule(tasklet); } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 466820b..e187bf0 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -140,6 +140,181 @@ ipv6: flow->ports = *ports; } + flow->thoff = (u16) nhoff; + return true; } EXPORT_SYMBOL(skb_flow_dissect); + +static u32 hashrnd __read_mostly; + +/* + * __skb_get_rxhash: calculate a flow hash based on src/dst addresses + * and src/dst port numbers. Sets rxhash in skb to non-zero hash value + * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb + * if hash is a canonical 4-tuple hash over transport ports. + */ +void __skb_get_rxhash(struct sk_buff *skb) +{ + struct flow_keys keys; + u32 hash; + + if (!skb_flow_dissect(skb, &keys)) + return; + + if (keys.ports) + skb->l4_rxhash = 1; + + /* get a consistent hash (same value on both flow directions) */ + if (((__force u32)keys.dst < (__force u32)keys.src) || + (((__force u32)keys.dst == (__force u32)keys.src) && + ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { + swap(keys.dst, keys.src); + swap(keys.port16[0], keys.port16[1]); + } + + hash = jhash_3words((__force u32)keys.dst, + (__force u32)keys.src, + (__force u32)keys.ports, hashrnd); + if (!hash) + hash = 1; + + skb->rxhash = hash; +} +EXPORT_SYMBOL(__skb_get_rxhash); + +/* + * Returns a Tx hash based on the given packet descriptor a Tx queues' number + * to be used as a distribution range. + */ +u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, + unsigned int num_tx_queues) +{ + u32 hash; + u16 qoffset = 0; + u16 qcount = num_tx_queues; + + if (skb_rx_queue_recorded(skb)) { + hash = skb_get_rx_queue(skb); + while (unlikely(hash >= num_tx_queues)) + hash -= num_tx_queues; + return hash; + } + + if (dev->num_tc) { + u8 tc = netdev_get_prio_tc_map(dev, skb->priority); + qoffset = dev->tc_to_txq[tc].offset; + qcount = dev->tc_to_txq[tc].count; + } + + if (skb->sk && skb->sk->sk_hash) + hash = skb->sk->sk_hash; + else + hash = (__force u16) skb->protocol; + hash = jhash_1word(hash, hashrnd); + + return (u16) (((u64) hash * qcount) >> 32) + qoffset; +} +EXPORT_SYMBOL(__skb_tx_hash); + +static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) +{ + if (unlikely(queue_index >= dev->real_num_tx_queues)) { + net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", + dev->name, queue_index, + dev->real_num_tx_queues); + return 0; + } + return queue_index; +} + +static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) +{ +#ifdef CONFIG_XPS + struct xps_dev_maps *dev_maps; + struct xps_map *map; + int queue_index = -1; + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { + map = rcu_dereference( + dev_maps->cpu_map[raw_smp_processor_id()]); + if (map) { + if (map->len == 1) + queue_index = map->queues[0]; + else { + u32 hash; + if (skb->sk && skb->sk->sk_hash) + hash = skb->sk->sk_hash; + else + hash = (__force u16) skb->protocol ^ + skb->rxhash; + hash = jhash_1word(hash, hashrnd); + queue_index = map->queues[ + ((u64)hash * map->len) >> 32]; + } + if (unlikely(queue_index >= dev->real_num_tx_queues)) + queue_index = -1; + } + } + rcu_read_unlock(); + + return queue_index; +#else + return -1; +#endif +} + +u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + int queue_index = sk_tx_queue_get(sk); + + if (queue_index < 0 || skb->ooo_okay || + queue_index >= dev->real_num_tx_queues) { + int new_index = get_xps_queue(dev, skb); + if (new_index < 0) + new_index = skb_tx_hash(dev, skb); + + if (queue_index != new_index && sk) { + struct dst_entry *dst = + rcu_dereference_check(sk->sk_dst_cache, 1); + + if (dst && skb_dst(skb) == dst) + sk_tx_queue_set(sk, queue_index); + + } + + queue_index = new_index; + } + + return queue_index; +} +EXPORT_SYMBOL(__netdev_pick_tx); + +struct netdev_queue *netdev_pick_tx(struct net_device *dev, + struct sk_buff *skb) +{ + int queue_index = 0; + + if (dev->real_num_tx_queues != 1) { + const struct net_device_ops *ops = dev->netdev_ops; + if (ops->ndo_select_queue) + queue_index = ops->ndo_select_queue(dev, skb); + else + queue_index = __netdev_pick_tx(dev, skb); + queue_index = dev_cap_txqueue(dev, queue_index); + } + + skb_set_queue_mapping(skb, queue_index); + return netdev_get_tx_queue(dev, queue_index); +} + +static int __init initialize_hashrnd(void) +{ + get_random_bytes(&hashrnd, sizeof(hashrnd)); + return 0; +} + +late_initcall_sync(initialize_hashrnd); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c815f28..3863b8f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -290,15 +290,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device goto out_entries; } - if (tbl->entry_size) - n = kzalloc(tbl->entry_size, GFP_ATOMIC); - else { - int sz = sizeof(*n) + tbl->key_len; - - sz = ALIGN(sz, NEIGH_PRIV_ALIGN); - sz += dev->neigh_priv_len; - n = kzalloc(sz, GFP_ATOMIC); - } + n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); if (!n) goto out_entries; @@ -778,6 +770,9 @@ static void neigh_periodic_work(struct work_struct *work) nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); + if (atomic_read(&tbl->entries) < tbl->gc_thresh1) + goto out; + /* * periodically recompute ReachableTime from random function */ @@ -832,6 +827,7 @@ next_elt: nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); } +out: /* Cycle through all hash buckets every base_reachable_time/2 ticks. * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 * base_reachable_time. @@ -1542,6 +1538,12 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl) if (!tbl->nht || !tbl->phash_buckets) panic("cannot allocate neighbour cache hashes"); + if (!tbl->entry_size) + tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) + + tbl->key_len, NEIGH_PRIV_ALIGN); + else + WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); + rwlock_init(&tbl->lock); INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c new file mode 100644 index 0000000..3174f19 --- /dev/null +++ b/net/core/net-procfs.c @@ -0,0 +1,411 @@ +#include <linux/netdevice.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <net/wext.h> + +#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) + +extern struct list_head ptype_all __read_mostly; +extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; + +static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) +{ + struct net *net = seq_file_net(seq); + struct net_device *dev; + struct hlist_head *h; + unsigned int count = 0, offset = get_offset(*pos); + + h = &net->dev_name_head[get_bucket(*pos)]; + hlist_for_each_entry_rcu(dev, h, name_hlist) { + if (++count == offset) + return dev; + } + + return NULL; +} + +static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) +{ + struct net_device *dev; + unsigned int bucket; + + do { + dev = dev_from_same_bucket(seq, pos); + if (dev) + return dev; + + bucket = get_bucket(*pos) + 1; + *pos = set_bucket_offset(bucket, 1); + } while (bucket < NETDEV_HASHENTRIES); + + return NULL; +} + +/* + * This is invoked by the /proc filesystem handler to display a device + * in detail. + */ +static void *dev_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) +{ + rcu_read_lock(); + if (!*pos) + return SEQ_START_TOKEN; + + if (get_bucket(*pos) >= NETDEV_HASHENTRIES) + return NULL; + + return dev_from_bucket(seq, pos); +} + +static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return dev_from_bucket(seq, pos); +} + +static void dev_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) +{ + rcu_read_unlock(); +} + +static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) +{ + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); + + seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " + "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", + dev->name, stats->rx_bytes, stats->rx_packets, + stats->rx_errors, + stats->rx_dropped + stats->rx_missed_errors, + stats->rx_fifo_errors, + stats->rx_length_errors + stats->rx_over_errors + + stats->rx_crc_errors + stats->rx_frame_errors, + stats->rx_compressed, stats->multicast, + stats->tx_bytes, stats->tx_packets, + stats->tx_errors, stats->tx_dropped, + stats->tx_fifo_errors, stats->collisions, + stats->tx_carrier_errors + + stats->tx_aborted_errors + + stats->tx_window_errors + + stats->tx_heartbeat_errors, + stats->tx_compressed); +} + +/* + * Called from the PROCfs module. This now uses the new arbitrary sized + * /proc/net interface to create /proc/net/dev + */ +static int dev_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) + seq_puts(seq, "Inter-| Receive " + " | Transmit\n" + " face |bytes packets errs drop fifo frame " + "compressed multicast|bytes packets errs " + "drop fifo colls carrier compressed\n"); + else + dev_seq_printf_stats(seq, v); + return 0; +} + +static struct softnet_data *softnet_get_online(loff_t *pos) +{ + struct softnet_data *sd = NULL; + + while (*pos < nr_cpu_ids) + if (cpu_online(*pos)) { + sd = &per_cpu(softnet_data, *pos); + break; + } else + ++*pos; + return sd; +} + +static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) +{ + return softnet_get_online(pos); +} + +static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return softnet_get_online(pos); +} + +static void softnet_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int softnet_seq_show(struct seq_file *seq, void *v) +{ + struct softnet_data *sd = v; + + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", + sd->processed, sd->dropped, sd->time_squeeze, 0, + 0, 0, 0, 0, /* was fastroute */ + sd->cpu_collision, sd->received_rps); + return 0; +} + +static const struct seq_operations dev_seq_ops = { + .start = dev_seq_start, + .next = dev_seq_next, + .stop = dev_seq_stop, + .show = dev_seq_show, +}; + +static int dev_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &dev_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations dev_seq_fops = { + .owner = THIS_MODULE, + .open = dev_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +static const struct seq_operations softnet_seq_ops = { + .start = softnet_seq_start, + .next = softnet_seq_next, + .stop = softnet_seq_stop, + .show = softnet_seq_show, +}; + +static int softnet_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &softnet_seq_ops); +} + +static const struct file_operations softnet_seq_fops = { + .owner = THIS_MODULE, + .open = softnet_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static void *ptype_get_idx(loff_t pos) +{ + struct packet_type *pt = NULL; + loff_t i = 0; + int t; + + list_for_each_entry_rcu(pt, &ptype_all, list) { + if (i == pos) + return pt; + ++i; + } + + for (t = 0; t < PTYPE_HASH_SIZE; t++) { + list_for_each_entry_rcu(pt, &ptype_base[t], list) { + if (i == pos) + return pt; + ++i; + } + } + return NULL; +} + +static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) +{ + rcu_read_lock(); + return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; +} + +static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct packet_type *pt; + struct list_head *nxt; + int hash; + + ++*pos; + if (v == SEQ_START_TOKEN) + return ptype_get_idx(0); + + pt = v; + nxt = pt->list.next; + if (pt->type == htons(ETH_P_ALL)) { + if (nxt != &ptype_all) + goto found; + hash = 0; + nxt = ptype_base[0].next; + } else + hash = ntohs(pt->type) & PTYPE_HASH_MASK; + + while (nxt == &ptype_base[hash]) { + if (++hash >= PTYPE_HASH_SIZE) + return NULL; + nxt = ptype_base[hash].next; + } +found: + return list_entry(nxt, struct packet_type, list); +} + +static void ptype_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) +{ + rcu_read_unlock(); +} + +static int ptype_seq_show(struct seq_file *seq, void *v) +{ + struct packet_type *pt = v; + + if (v == SEQ_START_TOKEN) + seq_puts(seq, "Type Device Function\n"); + else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { + if (pt->type == htons(ETH_P_ALL)) + seq_puts(seq, "ALL "); + else + seq_printf(seq, "%04x", ntohs(pt->type)); + + seq_printf(seq, " %-8s %pF\n", + pt->dev ? pt->dev->name : "", pt->func); + } + + return 0; +} + +static const struct seq_operations ptype_seq_ops = { + .start = ptype_seq_start, + .next = ptype_seq_next, + .stop = ptype_seq_stop, + .show = ptype_seq_show, +}; + +static int ptype_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &ptype_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations ptype_seq_fops = { + .owner = THIS_MODULE, + .open = ptype_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + + +static int __net_init dev_proc_net_init(struct net *net) +{ + int rc = -ENOMEM; + + if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops)) + goto out; + if (!proc_create("softnet_stat", S_IRUGO, net->proc_net, + &softnet_seq_fops)) + goto out_dev; + if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops)) + goto out_softnet; + + if (wext_proc_init(net)) + goto out_ptype; + rc = 0; +out: + return rc; +out_ptype: + remove_proc_entry("ptype", net->proc_net); +out_softnet: + remove_proc_entry("softnet_stat", net->proc_net); +out_dev: + remove_proc_entry("dev", net->proc_net); + goto out; +} + +static void __net_exit dev_proc_net_exit(struct net *net) +{ + wext_proc_exit(net); + + remove_proc_entry("ptype", net->proc_net); + remove_proc_entry("softnet_stat", net->proc_net); + remove_proc_entry("dev", net->proc_net); +} + +static struct pernet_operations __net_initdata dev_proc_ops = { + .init = dev_proc_net_init, + .exit = dev_proc_net_exit, +}; + +static int dev_mc_seq_show(struct seq_file *seq, void *v) +{ + struct netdev_hw_addr *ha; + struct net_device *dev = v; + + if (v == SEQ_START_TOKEN) + return 0; + + netif_addr_lock_bh(dev); + netdev_for_each_mc_addr(ha, dev) { + int i; + + seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, + dev->name, ha->refcount, ha->global_use); + + for (i = 0; i < dev->addr_len; i++) + seq_printf(seq, "%02x", ha->addr[i]); + + seq_putc(seq, '\n'); + } + netif_addr_unlock_bh(dev); + return 0; +} + +static const struct seq_operations dev_mc_seq_ops = { + .start = dev_seq_start, + .next = dev_seq_next, + .stop = dev_seq_stop, + .show = dev_mc_seq_show, +}; + +static int dev_mc_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &dev_mc_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations dev_mc_seq_fops = { + .owner = THIS_MODULE, + .open = dev_mc_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +static int __net_init dev_mc_net_init(struct net *net) +{ + if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops)) + return -ENOMEM; + return 0; +} + +static void __net_exit dev_mc_net_exit(struct net *net) +{ + remove_proc_entry("dev_mcast", net->proc_net); +} + +static struct pernet_operations __net_initdata dev_mc_net_ops = { + .init = dev_mc_net_init, + .exit = dev_mc_net_exit, +}; + +int __init dev_proc_init(void) +{ + int ret = register_pernet_subsys(&dev_proc_ops); + if (!ret) + return register_pernet_subsys(&dev_mc_net_ops); + return ret; +} diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 334efd5..7427ab5 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -21,6 +21,7 @@ #include <linux/vmalloc.h> #include <linux/export.h> #include <linux/jiffies.h> +#include <linux/pm_runtime.h> #include "net-sysfs.h" @@ -126,6 +127,19 @@ static ssize_t show_broadcast(struct device *dev, return -EINVAL; } +static int change_carrier(struct net_device *net, unsigned long new_carrier) +{ + if (!netif_running(net)) + return -EINVAL; + return dev_change_carrier(net, (bool) new_carrier); +} + +static ssize_t store_carrier(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + return netdev_store(dev, attr, buf, len, change_carrier); +} + static ssize_t show_carrier(struct device *dev, struct device_attribute *attr, char *buf) { @@ -331,7 +345,7 @@ static struct device_attribute net_class_attributes[] = { __ATTR(link_mode, S_IRUGO, show_link_mode, NULL), __ATTR(address, S_IRUGO, show_address, NULL), __ATTR(broadcast, S_IRUGO, show_broadcast, NULL), - __ATTR(carrier, S_IRUGO, show_carrier, NULL), + __ATTR(carrier, S_IRUGO | S_IWUSR, show_carrier, store_carrier), __ATTR(speed, S_IRUGO, show_speed, NULL), __ATTR(duplex, S_IRUGO, show_duplex, NULL), __ATTR(dormant, S_IRUGO, show_dormant, NULL), @@ -989,68 +1003,14 @@ static ssize_t show_xps_map(struct netdev_queue *queue, return len; } -static DEFINE_MUTEX(xps_map_mutex); -#define xmap_dereference(P) \ - rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) - -static void xps_queue_release(struct netdev_queue *queue) -{ - struct net_device *dev = queue->dev; - struct xps_dev_maps *dev_maps; - struct xps_map *map; - unsigned long index; - int i, pos, nonempty = 0; - - index = get_netdev_queue_index(queue); - - mutex_lock(&xps_map_mutex); - dev_maps = xmap_dereference(dev->xps_maps); - - if (dev_maps) { - for_each_possible_cpu(i) { - map = xmap_dereference(dev_maps->cpu_map[i]); - if (!map) - continue; - - for (pos = 0; pos < map->len; pos++) - if (map->queues[pos] == index) - break; - - if (pos < map->len) { - if (map->len > 1) - map->queues[pos] = - map->queues[--map->len]; - else { - RCU_INIT_POINTER(dev_maps->cpu_map[i], - NULL); - kfree_rcu(map, rcu); - map = NULL; - } - } - if (map) - nonempty = 1; - } - - if (!nonempty) { - RCU_INIT_POINTER(dev->xps_maps, NULL); - kfree_rcu(dev_maps, rcu); - } - } - mutex_unlock(&xps_map_mutex); -} - static ssize_t store_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, const char *buf, size_t len) { struct net_device *dev = queue->dev; - cpumask_var_t mask; - int err, i, cpu, pos, map_len, alloc_len, need_set; unsigned long index; - struct xps_map *map, *new_map; - struct xps_dev_maps *dev_maps, *new_dev_maps; - int nonempty = 0; - int numa_node_id = -2; + cpumask_var_t mask; + int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1066,105 +1026,11 @@ static ssize_t store_xps_map(struct netdev_queue *queue, return err; } - new_dev_maps = kzalloc(max_t(unsigned int, - XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL); - if (!new_dev_maps) { - free_cpumask_var(mask); - return -ENOMEM; - } - - mutex_lock(&xps_map_mutex); - - dev_maps = xmap_dereference(dev->xps_maps); - - for_each_possible_cpu(cpu) { - map = dev_maps ? - xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; - new_map = map; - if (map) { - for (pos = 0; pos < map->len; pos++) - if (map->queues[pos] == index) - break; - map_len = map->len; - alloc_len = map->alloc_len; - } else - pos = map_len = alloc_len = 0; - - need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu); -#ifdef CONFIG_NUMA - if (need_set) { - if (numa_node_id == -2) - numa_node_id = cpu_to_node(cpu); - else if (numa_node_id != cpu_to_node(cpu)) - numa_node_id = -1; - } -#endif - if (need_set && pos >= map_len) { - /* Need to add queue to this CPU's map */ - if (map_len >= alloc_len) { - alloc_len = alloc_len ? - 2 * alloc_len : XPS_MIN_MAP_ALLOC; - new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), - GFP_KERNEL, - cpu_to_node(cpu)); - if (!new_map) - goto error; - new_map->alloc_len = alloc_len; - for (i = 0; i < map_len; i++) - new_map->queues[i] = map->queues[i]; - new_map->len = map_len; - } - new_map->queues[new_map->len++] = index; - } else if (!need_set && pos < map_len) { - /* Need to remove queue from this CPU's map */ - if (map_len > 1) - new_map->queues[pos] = - new_map->queues[--new_map->len]; - else - new_map = NULL; - } - RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map); - } - - /* Cleanup old maps */ - for_each_possible_cpu(cpu) { - map = dev_maps ? - xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; - if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map) - kfree_rcu(map, rcu); - if (new_dev_maps->cpu_map[cpu]) - nonempty = 1; - } - - if (nonempty) { - rcu_assign_pointer(dev->xps_maps, new_dev_maps); - } else { - kfree(new_dev_maps); - RCU_INIT_POINTER(dev->xps_maps, NULL); - } - - if (dev_maps) - kfree_rcu(dev_maps, rcu); - - netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id : - NUMA_NO_NODE); - - mutex_unlock(&xps_map_mutex); + err = netif_set_xps_queue(dev, mask, index); free_cpumask_var(mask); - return len; -error: - mutex_unlock(&xps_map_mutex); - - if (new_dev_maps) - for_each_possible_cpu(i) - kfree(rcu_dereference_protected( - new_dev_maps->cpu_map[i], - 1)); - kfree(new_dev_maps); - free_cpumask_var(mask); - return -ENOMEM; + return err ? : len; } static struct netdev_queue_attribute xps_cpus_attribute = @@ -1183,10 +1049,6 @@ static void netdev_queue_release(struct kobject *kobj) { struct netdev_queue *queue = to_netdev_queue(kobj); -#ifdef CONFIG_XPS - xps_queue_release(queue); -#endif - memset(kobj, 0, sizeof(*kobj)); dev_put(queue->dev); } @@ -1334,7 +1196,6 @@ struct kobj_ns_type_operations net_ns_type_operations = { }; EXPORT_SYMBOL_GPL(net_ns_type_operations); -#ifdef CONFIG_HOTPLUG static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) { struct net_device *dev = to_net_dev(d); @@ -1353,7 +1214,6 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) exit: return retval; } -#endif /* * netdev_release -- destroy and free a dead device. @@ -1382,9 +1242,7 @@ static struct class net_class = { #ifdef CONFIG_SYSFS .dev_attrs = net_class_attributes, #endif /* CONFIG_SYSFS */ -#ifdef CONFIG_HOTPLUG .dev_uevent = netdev_uevent, -#endif .ns_type = &net_ns_type_operations, .namespace = net_namespace, }; @@ -1400,6 +1258,8 @@ void netdev_unregister_kobject(struct net_device * net) remove_queue_kobjects(net); + pm_runtime_set_memalloc_noio(dev, false); + device_del(dev); } @@ -1444,6 +1304,8 @@ int netdev_register_kobject(struct net_device *net) return error; } + pm_runtime_set_memalloc_noio(dev, true); + return error; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 8acce01..80e271d 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -344,7 +344,7 @@ struct net *get_net_ns_by_fd(int fd) if (IS_ERR(file)) return ERR_CAST(file); - ei = PROC_I(file->f_dentry->d_inode); + ei = PROC_I(file_inode(file)); if (ei->ns_ops == &netns_operations) net = get_net(ei->ns); else diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 3151acf..fa32899 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -29,6 +29,9 @@ #include <linux/if_vlan.h> #include <net/tcp.h> #include <net/udp.h> +#include <net/addrconf.h> +#include <net/ndisc.h> +#include <net/ip6_checksum.h> #include <asm/unaligned.h> #include <trace/events/napi.h> @@ -44,6 +47,8 @@ static struct sk_buff_head skb_pool; static atomic_t trapped; +static struct srcu_struct netpoll_srcu; + #define USEC_PER_POLL 50 #define NETPOLL_RX_ENABLED 1 #define NETPOLL_RX_DROP 2 @@ -55,7 +60,8 @@ static atomic_t trapped; MAX_UDP_CHUNK) static void zap_completion_queue(void); -static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo); +static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); +static void netpoll_async_cleanup(struct work_struct *work); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); @@ -181,13 +187,13 @@ static void poll_napi(struct net_device *dev) } } -static void service_arp_queue(struct netpoll_info *npi) +static void service_neigh_queue(struct netpoll_info *npi) { if (npi) { struct sk_buff *skb; - while ((skb = skb_dequeue(&npi->arp_tx))) - netpoll_arp_reply(skb, npi); + while ((skb = skb_dequeue(&npi->neigh_tx))) + netpoll_neigh_reply(skb, npi); } } @@ -196,35 +202,76 @@ static void netpoll_poll_dev(struct net_device *dev) const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); - if (!dev || !netif_running(dev)) + /* Don't do any rx activity if the dev_lock mutex is held + * the dev_open/close paths use this to block netpoll activity + * while changing device state + */ + if (!mutex_trylock(&ni->dev_lock)) return; + if (!netif_running(dev)) { + mutex_unlock(&ni->dev_lock); + return; + } + ops = dev->netdev_ops; - if (!ops->ndo_poll_controller) + if (!ops->ndo_poll_controller) { + mutex_unlock(&ni->dev_lock); return; + } /* Process pending work on NIC */ ops->ndo_poll_controller(dev); poll_napi(dev); + mutex_unlock(&ni->dev_lock); + if (dev->flags & IFF_SLAVE) { if (ni) { - struct net_device *bond_dev = dev->master; + struct net_device *bond_dev; struct sk_buff *skb; - struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo); - while ((skb = skb_dequeue(&ni->arp_tx))) { + struct netpoll_info *bond_ni; + + bond_dev = netdev_master_upper_dev_get_rcu(dev); + bond_ni = rcu_dereference_bh(bond_dev->npinfo); + while ((skb = skb_dequeue(&ni->neigh_tx))) { skb->dev = bond_dev; - skb_queue_tail(&bond_ni->arp_tx, skb); + skb_queue_tail(&bond_ni->neigh_tx, skb); } } } - service_arp_queue(ni); + service_neigh_queue(ni); zap_completion_queue(); } +int netpoll_rx_disable(struct net_device *dev) +{ + struct netpoll_info *ni; + int idx; + might_sleep(); + idx = srcu_read_lock(&netpoll_srcu); + ni = srcu_dereference(dev->npinfo, &netpoll_srcu); + if (ni) + mutex_lock(&ni->dev_lock); + srcu_read_unlock(&netpoll_srcu, idx); + return 0; +} +EXPORT_SYMBOL(netpoll_rx_disable); + +void netpoll_rx_enable(struct net_device *dev) +{ + struct netpoll_info *ni; + rcu_read_lock(); + ni = rcu_dereference(dev->npinfo); + if (ni) + mutex_unlock(&ni->dev_lock); + rcu_read_unlock(); +} +EXPORT_SYMBOL(netpoll_rx_enable); + static void refill_skbs(void) { struct sk_buff *skb; @@ -381,9 +428,14 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) struct iphdr *iph; struct ethhdr *eth; static atomic_t ip_ident; + struct ipv6hdr *ip6h; udp_len = len + sizeof(*udph); - ip_len = udp_len + sizeof(*iph); + if (np->ipv6) + ip_len = udp_len + sizeof(*ip6h); + else + ip_len = udp_len + sizeof(*iph); + total_len = ip_len + LL_RESERVED_SPACE(np->dev); skb = find_skb(np, total_len + np->dev->needed_tailroom, @@ -400,34 +452,66 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->source = htons(np->local_port); udph->dest = htons(np->remote_port); udph->len = htons(udp_len); - udph->check = 0; - udph->check = csum_tcpudp_magic(np->local_ip, - np->remote_ip, - udp_len, IPPROTO_UDP, - csum_partial(udph, udp_len, 0)); - if (udph->check == 0) - udph->check = CSUM_MANGLED_0; - - skb_push(skb, sizeof(*iph)); - skb_reset_network_header(skb); - iph = ip_hdr(skb); - - /* iph->version = 4; iph->ihl = 5; */ - put_unaligned(0x45, (unsigned char *)iph); - iph->tos = 0; - put_unaligned(htons(ip_len), &(iph->tot_len)); - iph->id = htons(atomic_inc_return(&ip_ident)); - iph->frag_off = 0; - iph->ttl = 64; - iph->protocol = IPPROTO_UDP; - iph->check = 0; - put_unaligned(np->local_ip, &(iph->saddr)); - put_unaligned(np->remote_ip, &(iph->daddr)); - iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); - - eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); - skb_reset_mac_header(skb); - skb->protocol = eth->h_proto = htons(ETH_P_IP); + + if (np->ipv6) { + udph->check = 0; + udph->check = csum_ipv6_magic(&np->local_ip.in6, + &np->remote_ip.in6, + udp_len, IPPROTO_UDP, + csum_partial(udph, udp_len, 0)); + if (udph->check == 0) + udph->check = CSUM_MANGLED_0; + + skb_push(skb, sizeof(*ip6h)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + + /* ip6h->version = 6; ip6h->priority = 0; */ + put_unaligned(0x60, (unsigned char *)ip6h); + ip6h->flow_lbl[0] = 0; + ip6h->flow_lbl[1] = 0; + ip6h->flow_lbl[2] = 0; + + ip6h->payload_len = htons(sizeof(struct udphdr) + len); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = 32; + ip6h->saddr = np->local_ip.in6; + ip6h->daddr = np->remote_ip.in6; + + eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb->protocol = eth->h_proto = htons(ETH_P_IPV6); + } else { + udph->check = 0; + udph->check = csum_tcpudp_magic(np->local_ip.ip, + np->remote_ip.ip, + udp_len, IPPROTO_UDP, + csum_partial(udph, udp_len, 0)); + if (udph->check == 0) + udph->check = CSUM_MANGLED_0; + + skb_push(skb, sizeof(*iph)); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + + /* iph->version = 4; iph->ihl = 5; */ + put_unaligned(0x45, (unsigned char *)iph); + iph->tos = 0; + put_unaligned(htons(ip_len), &(iph->tot_len)); + iph->id = htons(atomic_inc_return(&ip_ident)); + iph->frag_off = 0; + iph->ttl = 64; + iph->protocol = IPPROTO_UDP; + iph->check = 0; + put_unaligned(np->local_ip.ip, &(iph->saddr)); + put_unaligned(np->remote_ip.ip, &(iph->daddr)); + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + + eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb->protocol = eth->h_proto = htons(ETH_P_IP); + } + memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN); memcpy(eth->h_dest, np->remote_mac, ETH_ALEN); @@ -437,18 +521,16 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) } EXPORT_SYMBOL(netpoll_send_udp); -static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) +static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo) { - struct arphdr *arp; - unsigned char *arp_ptr; - int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; + int size, type = ARPOP_REPLY; __be32 sip, tip; unsigned char *sha; struct sk_buff *send_skb; struct netpoll *np, *tmp; unsigned long flags; int hlen, tlen; - int hits = 0; + int hits = 0, proto; if (list_empty(&npinfo->rx_np)) return; @@ -466,94 +548,214 @@ static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) if (!hits) return; - /* No arp on this interface */ - if (skb->dev->flags & IFF_NOARP) - return; - - if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) - return; + proto = ntohs(eth_hdr(skb)->h_proto); + if (proto == ETH_P_IP) { + struct arphdr *arp; + unsigned char *arp_ptr; + /* No arp on this interface */ + if (skb->dev->flags & IFF_NOARP) + return; - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - arp = arp_hdr(skb); + if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) + return; - if ((arp->ar_hrd != htons(ARPHRD_ETHER) && - arp->ar_hrd != htons(ARPHRD_IEEE802)) || - arp->ar_pro != htons(ETH_P_IP) || - arp->ar_op != htons(ARPOP_REQUEST)) - return; + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + arp = arp_hdr(skb); - arp_ptr = (unsigned char *)(arp+1); - /* save the location of the src hw addr */ - sha = arp_ptr; - arp_ptr += skb->dev->addr_len; - memcpy(&sip, arp_ptr, 4); - arp_ptr += 4; - /* If we actually cared about dst hw addr, - it would get copied here */ - arp_ptr += skb->dev->addr_len; - memcpy(&tip, arp_ptr, 4); - - /* Should we ignore arp? */ - if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) - return; + if ((arp->ar_hrd != htons(ARPHRD_ETHER) && + arp->ar_hrd != htons(ARPHRD_IEEE802)) || + arp->ar_pro != htons(ETH_P_IP) || + arp->ar_op != htons(ARPOP_REQUEST)) + return; - size = arp_hdr_len(skb->dev); + arp_ptr = (unsigned char *)(arp+1); + /* save the location of the src hw addr */ + sha = arp_ptr; + arp_ptr += skb->dev->addr_len; + memcpy(&sip, arp_ptr, 4); + arp_ptr += 4; + /* If we actually cared about dst hw addr, + it would get copied here */ + arp_ptr += skb->dev->addr_len; + memcpy(&tip, arp_ptr, 4); - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (tip != np->local_ip) - continue; + /* Should we ignore arp? */ + if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) + return; - hlen = LL_RESERVED_SPACE(np->dev); - tlen = np->dev->needed_tailroom; - send_skb = find_skb(np, size + hlen + tlen, hlen); - if (!send_skb) - continue; + size = arp_hdr_len(skb->dev); - skb_reset_network_header(send_skb); - arp = (struct arphdr *) skb_put(send_skb, size); - send_skb->dev = skb->dev; - send_skb->protocol = htons(ETH_P_ARP); + spin_lock_irqsave(&npinfo->rx_lock, flags); + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (tip != np->local_ip.ip) + continue; + + hlen = LL_RESERVED_SPACE(np->dev); + tlen = np->dev->needed_tailroom; + send_skb = find_skb(np, size + hlen + tlen, hlen); + if (!send_skb) + continue; + + skb_reset_network_header(send_skb); + arp = (struct arphdr *) skb_put(send_skb, size); + send_skb->dev = skb->dev; + send_skb->protocol = htons(ETH_P_ARP); + + /* Fill the device header for the ARP frame */ + if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP, + sha, np->dev->dev_addr, + send_skb->len) < 0) { + kfree_skb(send_skb); + continue; + } - /* Fill the device header for the ARP frame */ - if (dev_hard_header(send_skb, skb->dev, ptype, - sha, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - continue; + /* + * Fill out the arp protocol part. + * + * we only support ethernet device type, + * which (according to RFC 1390) should + * always equal 1 (Ethernet). + */ + + arp->ar_hrd = htons(np->dev->type); + arp->ar_pro = htons(ETH_P_IP); + arp->ar_hln = np->dev->addr_len; + arp->ar_pln = 4; + arp->ar_op = htons(type); + + arp_ptr = (unsigned char *)(arp + 1); + memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); + arp_ptr += np->dev->addr_len; + memcpy(arp_ptr, &tip, 4); + arp_ptr += 4; + memcpy(arp_ptr, sha, np->dev->addr_len); + arp_ptr += np->dev->addr_len; + memcpy(arp_ptr, &sip, 4); + + netpoll_send_skb(np, send_skb); + + /* If there are several rx_hooks for the same address, + we're fine by sending a single reply */ + break; } + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } else if( proto == ETH_P_IPV6) { +#if IS_ENABLED(CONFIG_IPV6) + struct nd_msg *msg; + u8 *lladdr = NULL; + struct ipv6hdr *hdr; + struct icmp6hdr *icmp6h; + const struct in6_addr *saddr; + const struct in6_addr *daddr; + struct inet6_dev *in6_dev = NULL; + struct in6_addr *target; + + in6_dev = in6_dev_get(skb->dev); + if (!in6_dev || !in6_dev->cnf.accept_ra) + return; - /* - * Fill out the arp protocol part. - * - * we only support ethernet device type, - * which (according to RFC 1390) should - * always equal 1 (Ethernet). - */ + if (!pskb_may_pull(skb, skb->len)) + return; - arp->ar_hrd = htons(np->dev->type); - arp->ar_pro = htons(ETH_P_IP); - arp->ar_hln = np->dev->addr_len; - arp->ar_pln = 4; - arp->ar_op = htons(type); + msg = (struct nd_msg *)skb_transport_header(skb); - arp_ptr = (unsigned char *)(arp + 1); - memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &tip, 4); - arp_ptr += 4; - memcpy(arp_ptr, sha, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &sip, 4); + __skb_push(skb, skb->data - skb_transport_header(skb)); - netpoll_send_skb(np, send_skb); + if (ipv6_hdr(skb)->hop_limit != 255) + return; + if (msg->icmph.icmp6_code != 0) + return; + if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) + return; + + saddr = &ipv6_hdr(skb)->saddr; + daddr = &ipv6_hdr(skb)->daddr; - /* If there are several rx_hooks for the same address, - we're fine by sending a single reply */ - break; + size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); + + spin_lock_irqsave(&npinfo->rx_lock, flags); + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (!ipv6_addr_equal(daddr, &np->local_ip.in6)) + continue; + + hlen = LL_RESERVED_SPACE(np->dev); + tlen = np->dev->needed_tailroom; + send_skb = find_skb(np, size + hlen + tlen, hlen); + if (!send_skb) + continue; + + send_skb->protocol = htons(ETH_P_IPV6); + send_skb->dev = skb->dev; + + skb_reset_network_header(send_skb); + skb_put(send_skb, sizeof(struct ipv6hdr)); + hdr = ipv6_hdr(send_skb); + + *(__be32*)hdr = htonl(0x60000000); + + hdr->payload_len = htons(size); + hdr->nexthdr = IPPROTO_ICMPV6; + hdr->hop_limit = 255; + hdr->saddr = *saddr; + hdr->daddr = *daddr; + + send_skb->transport_header = send_skb->tail; + skb_put(send_skb, size); + + icmp6h = (struct icmp6hdr *)skb_transport_header(skb); + icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; + icmp6h->icmp6_router = 0; + icmp6h->icmp6_solicited = 1; + target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr)); + *target = msg->target; + icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, + IPPROTO_ICMPV6, + csum_partial(icmp6h, + size, 0)); + + if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6, + lladdr, np->dev->dev_addr, + send_skb->len) < 0) { + kfree_skb(send_skb); + continue; + } + + netpoll_send_skb(np, send_skb); + + /* If there are several rx_hooks for the same address, + we're fine by sending a single reply */ + break; + } + spin_unlock_irqrestore(&npinfo->rx_lock, flags); +#endif } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); +} + +static bool pkt_is_ns(struct sk_buff *skb) +{ + struct nd_msg *msg; + struct ipv6hdr *hdr; + + if (skb->protocol != htons(ETH_P_ARP)) + return false; + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) + return false; + + msg = (struct nd_msg *)skb_transport_header(skb); + __skb_push(skb, skb->data - skb_transport_header(skb)); + hdr = ipv6_hdr(skb); + + if (hdr->nexthdr != IPPROTO_ICMPV6) + return false; + if (hdr->hop_limit != 255) + return false; + if (msg->icmph.icmp6_code != 0) + return false; + if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) + return false; + + return true; } int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) @@ -571,9 +773,11 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) goto out; /* check if netpoll clients need ARP */ - if (skb->protocol == htons(ETH_P_ARP) && - atomic_read(&trapped)) { - skb_queue_tail(&npinfo->arp_tx, skb); + if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) { + skb_queue_tail(&npinfo->neigh_tx, skb); + return 1; + } else if (pkt_is_ns(skb) && atomic_read(&trapped)) { + skb_queue_tail(&npinfo->neigh_tx, skb); return 1; } @@ -584,60 +788,100 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) } proto = ntohs(eth_hdr(skb)->h_proto); - if (proto != ETH_P_IP) + if (proto != ETH_P_IP && proto != ETH_P_IPV6) goto out; if (skb->pkt_type == PACKET_OTHERHOST) goto out; if (skb_shared(skb)) goto out; - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto out; - iph = (struct iphdr *)skb->data; - if (iph->ihl < 5 || iph->version != 4) - goto out; - if (!pskb_may_pull(skb, iph->ihl*4)) - goto out; - iph = (struct iphdr *)skb->data; - if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) - goto out; - - len = ntohs(iph->tot_len); - if (skb->len < len || len < iph->ihl*4) - goto out; + if (proto == ETH_P_IP) { + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto out; + iph = (struct iphdr *)skb->data; + if (iph->ihl < 5 || iph->version != 4) + goto out; + if (!pskb_may_pull(skb, iph->ihl*4)) + goto out; + iph = (struct iphdr *)skb->data; + if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) + goto out; - /* - * Our transport medium may have padded the buffer out. - * Now We trim to the true length of the frame. - */ - if (pskb_trim_rcsum(skb, len)) - goto out; + len = ntohs(iph->tot_len); + if (skb->len < len || len < iph->ihl*4) + goto out; - iph = (struct iphdr *)skb->data; - if (iph->protocol != IPPROTO_UDP) - goto out; + /* + * Our transport medium may have padded the buffer out. + * Now We trim to the true length of the frame. + */ + if (pskb_trim_rcsum(skb, len)) + goto out; - len -= iph->ihl*4; - uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); - ulen = ntohs(uh->len); + iph = (struct iphdr *)skb->data; + if (iph->protocol != IPPROTO_UDP) + goto out; - if (ulen != len) - goto out; - if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) - goto out; + len -= iph->ihl*4; + uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); + ulen = ntohs(uh->len); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (np->local_ip && np->local_ip != iph->daddr) - continue; - if (np->remote_ip && np->remote_ip != iph->saddr) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; + if (ulen != len) + goto out; + if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) + goto out; + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (np->local_ip.ip && np->local_ip.ip != iph->daddr) + continue; + if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr) + continue; + if (np->local_port && np->local_port != ntohs(uh->dest)) + continue; + + np->rx_hook(np, ntohs(uh->source), + (char *)(uh+1), + ulen - sizeof(struct udphdr)); + hits++; + } + } else { +#if IS_ENABLED(CONFIG_IPV6) + const struct ipv6hdr *ip6h; - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); - hits++; + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto out; + ip6h = (struct ipv6hdr *)skb->data; + if (ip6h->version != 6) + goto out; + len = ntohs(ip6h->payload_len); + if (!len) + goto out; + if (len + sizeof(struct ipv6hdr) > skb->len) + goto out; + if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr))) + goto out; + ip6h = ipv6_hdr(skb); + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + uh = udp_hdr(skb); + ulen = ntohs(uh->len); + if (ulen != skb->len) + goto out; + if (udp6_csum_init(skb, uh, IPPROTO_UDP)) + goto out; + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr)) + continue; + if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr)) + continue; + if (np->local_port && np->local_port != ntohs(uh->dest)) + continue; + + np->rx_hook(np, ntohs(uh->source), + (char *)(uh+1), + ulen - sizeof(struct udphdr)); + hits++; + } +#endif } if (!hits) @@ -658,17 +902,44 @@ out: void netpoll_print_options(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); - np_info(np, "local IP %pI4\n", &np->local_ip); + if (np->ipv6) + np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6); + else + np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip); np_info(np, "interface '%s'\n", np->dev_name); np_info(np, "remote port %d\n", np->remote_port); - np_info(np, "remote IP %pI4\n", &np->remote_ip); + if (np->ipv6) + np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6); + else + np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip); np_info(np, "remote ethernet address %pM\n", np->remote_mac); } EXPORT_SYMBOL(netpoll_print_options); +static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr) +{ + const char *end; + + if (!strchr(str, ':') && + in4_pton(str, -1, (void *)addr, -1, &end) > 0) { + if (!*end) + return 0; + } + if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) { +#if IS_ENABLED(CONFIG_IPV6) + if (!*end) + return 1; +#else + return -1; +#endif + } + return -1; +} + int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; + int ipv6; if (*cur != '@') { if ((delim = strchr(cur, '@')) == NULL) @@ -684,7 +955,11 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; - np->local_ip = in_aton(cur); + ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip); + if (ipv6 < 0) + goto parse_failed; + else + np->ipv6 = (bool)ipv6; cur = delim; } cur++; @@ -716,7 +991,13 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; - np->remote_ip = in_aton(cur); + ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); + if (ipv6 < 0) + goto parse_failed; + else if (np->ipv6 != (bool)ipv6) + goto parse_failed; + else + np->ipv6 = (bool)ipv6; cur = delim + 1; if (*cur != 0) { @@ -744,6 +1025,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) np->dev = ndev; strlcpy(np->dev_name, ndev->name, IFNAMSIZ); + INIT_WORK(&np->cleanup_work, netpoll_async_cleanup); if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || !ndev->netdev_ops->ndo_poll_controller) { @@ -764,7 +1046,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) INIT_LIST_HEAD(&npinfo->rx_np); spin_lock_init(&npinfo->rx_lock); - skb_queue_head_init(&npinfo->arp_tx); + mutex_init(&npinfo->dev_lock); + skb_queue_head_init(&npinfo->neigh_tx); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); @@ -777,7 +1060,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) goto free_npinfo; } } else { - npinfo = ndev->npinfo; + npinfo = rtnl_dereference(ndev->npinfo); atomic_inc(&npinfo->refcnt); } @@ -808,14 +1091,19 @@ int netpoll_setup(struct netpoll *np) struct in_device *in_dev; int err; - if (np->dev_name) - ndev = dev_get_by_name(&init_net, np->dev_name); + rtnl_lock(); + if (np->dev_name) { + struct net *net = current->nsproxy->net_ns; + ndev = __dev_get_by_name(net, np->dev_name); + } if (!ndev) { np_err(np, "%s doesn't exist, aborting\n", np->dev_name); - return -ENODEV; + err = -ENODEV; + goto unlock; } + dev_hold(ndev); - if (ndev->master) { + if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", np->dev_name); err = -EBUSY; goto put; @@ -826,15 +1114,14 @@ int netpoll_setup(struct netpoll *np) np_info(np, "device %s not up yet, forcing it\n", np->dev_name); - rtnl_lock(); err = dev_open(ndev); - rtnl_unlock(); if (err) { np_err(np, "failed to open %s\n", ndev->name); goto put; } + rtnl_unlock(); atleast = jiffies + HZ/10; atmost = jiffies + carrier_timeout * HZ; while (!netif_carrier_ok(ndev)) { @@ -854,39 +1141,70 @@ int netpoll_setup(struct netpoll *np) np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n"); msleep(4000); } + rtnl_lock(); } - if (!np->local_ip) { - rcu_read_lock(); - in_dev = __in_dev_get_rcu(ndev); + if (!np->local_ip.ip) { + if (!np->ipv6) { + in_dev = __in_dev_get_rtnl(ndev); + + if (!in_dev || !in_dev->ifa_list) { + np_err(np, "no IP address for %s, aborting\n", + np->dev_name); + err = -EDESTADDRREQ; + goto put; + } + + np->local_ip.ip = in_dev->ifa_list->ifa_local; + np_info(np, "local IP %pI4\n", &np->local_ip.ip); + } else { +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_dev *idev; - if (!in_dev || !in_dev->ifa_list) { - rcu_read_unlock(); - np_err(np, "no IP address for %s, aborting\n", - np->dev_name); err = -EDESTADDRREQ; + idev = __in6_dev_get(ndev); + if (idev) { + struct inet6_ifaddr *ifp; + + read_lock_bh(&idev->lock); + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) + continue; + np->local_ip.in6 = ifp->addr; + err = 0; + break; + } + read_unlock_bh(&idev->lock); + } + if (err) { + np_err(np, "no IPv6 address for %s, aborting\n", + np->dev_name); + goto put; + } else + np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6); +#else + np_err(np, "IPv6 is not supported %s, aborting\n", + np->dev_name); + err = -EINVAL; goto put; +#endif } - - np->local_ip = in_dev->ifa_list->ifa_local; - rcu_read_unlock(); - np_info(np, "local IP %pI4\n", &np->local_ip); } /* fill up the skb queue */ refill_skbs(); - rtnl_lock(); err = __netpoll_setup(np, ndev, GFP_KERNEL); - rtnl_unlock(); - if (err) goto put; + rtnl_unlock(); return 0; put: dev_put(ndev); +unlock: + rtnl_unlock(); return err; } EXPORT_SYMBOL(netpoll_setup); @@ -894,6 +1212,7 @@ EXPORT_SYMBOL(netpoll_setup); static int __init netpoll_init(void) { skb_queue_head_init(&skb_pool); + init_srcu_struct(&netpoll_srcu); return 0; } core_initcall(netpoll_init); @@ -903,7 +1222,7 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) struct netpoll_info *npinfo = container_of(rcu_head, struct netpoll_info, rcu); - skb_queue_purge(&npinfo->arp_tx); + skb_queue_purge(&npinfo->neigh_tx); skb_queue_purge(&npinfo->txq); /* we can't call cancel_delayed_work_sync here, as we are in softirq */ @@ -921,7 +1240,11 @@ void __netpoll_cleanup(struct netpoll *np) struct netpoll_info *npinfo; unsigned long flags; - npinfo = np->dev->npinfo; + /* rtnl_dereference would be preferable here but + * rcu_cleanup_netpoll path can put us in here safely without + * holding the rtnl, so plain rcu_dereference it is + */ + npinfo = rtnl_dereference(np->dev->npinfo); if (!npinfo) return; @@ -933,6 +1256,8 @@ void __netpoll_cleanup(struct netpoll *np) spin_unlock_irqrestore(&npinfo->rx_lock, flags); } + synchronize_srcu(&netpoll_srcu); + if (atomic_dec_and_test(&npinfo->refcnt)) { const struct net_device_ops *ops; @@ -940,25 +1265,27 @@ void __netpoll_cleanup(struct netpoll *np) if (ops->ndo_netpoll_cleanup) ops->ndo_netpoll_cleanup(np->dev); - RCU_INIT_POINTER(np->dev->npinfo, NULL); + rcu_assign_pointer(np->dev->npinfo, NULL); call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); } } EXPORT_SYMBOL_GPL(__netpoll_cleanup); -static void rcu_cleanup_netpoll(struct rcu_head *rcu_head) +static void netpoll_async_cleanup(struct work_struct *work) { - struct netpoll *np = container_of(rcu_head, struct netpoll, rcu); + struct netpoll *np = container_of(work, struct netpoll, cleanup_work); + rtnl_lock(); __netpoll_cleanup(np); + rtnl_unlock(); kfree(np); } -void __netpoll_free_rcu(struct netpoll *np) +void __netpoll_free_async(struct netpoll *np) { - call_rcu_bh(&np->rcu, rcu_cleanup_netpoll); + schedule_work(&np->cleanup_work); } -EXPORT_SYMBOL_GPL(__netpoll_free_rcu); +EXPORT_SYMBOL_GPL(__netpoll_free_async); void netpoll_cleanup(struct netpoll *np) { diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 5e67def..0777d0a 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -69,10 +69,8 @@ static int extend_netdev_table(struct net_device *dev, u32 target_idx) /* allocate & copy */ new = kzalloc(new_sz, GFP_KERNEL); - if (!new) { - pr_warn("Unable to alloc new priomap!\n"); + if (!new) return -ENOMEM; - } if (old) memcpy(new->priomap, old->priomap, diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b29dacf..6048fc1 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -164,6 +164,7 @@ #ifdef CONFIG_XFRM #include <net/xfrm.h> #endif +#include <net/netns/generic.h> #include <asm/byteorder.h> #include <linux/rcupdate.h> #include <linux/bitops.h> @@ -212,7 +213,6 @@ #define PKTGEN_MAGIC 0xbe9be955 #define PG_PROC_DIR "pktgen" #define PGCTRL "pgctrl" -static struct proc_dir_entry *pg_proc_dir; #define MAX_CFLOWS 65536 @@ -397,7 +397,15 @@ struct pktgen_hdr { __be32 tv_usec; }; -static bool pktgen_exiting __read_mostly; + +static int pg_net_id __read_mostly; + +struct pktgen_net { + struct net *net; + struct proc_dir_entry *proc_dir; + struct list_head pktgen_threads; + bool pktgen_exiting; +}; struct pktgen_thread { spinlock_t if_lock; /* for list of devices */ @@ -414,6 +422,7 @@ struct pktgen_thread { wait_queue_head_t queue; struct completion start_done; + struct pktgen_net *net; }; #define REMOVE 1 @@ -428,9 +437,9 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char *ifname, bool exact); static int pktgen_device_event(struct notifier_block *, unsigned long, void *); -static void pktgen_run_all_threads(void); -static void pktgen_reset_all_threads(void); -static void pktgen_stop_all_threads_ifs(void); +static void pktgen_run_all_threads(struct pktgen_net *pn); +static void pktgen_reset_all_threads(struct pktgen_net *pn); +static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn); static void pktgen_stop(struct pktgen_thread *t); static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); @@ -442,7 +451,6 @@ static int pg_clone_skb_d __read_mostly; static int debug __read_mostly; static DEFINE_MUTEX(pktgen_thread_lock); -static LIST_HEAD(pktgen_threads); static struct notifier_block pktgen_notifier_block = { .notifier_call = pktgen_device_event, @@ -464,6 +472,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf, { int err = 0; char data[128]; + struct pktgen_net *pn = net_generic(current->nsproxy->net_ns, pg_net_id); if (!capable(CAP_NET_ADMIN)) { err = -EPERM; @@ -480,13 +489,13 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf, data[count - 1] = 0; /* Make string */ if (!strcmp(data, "stop")) - pktgen_stop_all_threads_ifs(); + pktgen_stop_all_threads_ifs(pn); else if (!strcmp(data, "start")) - pktgen_run_all_threads(); + pktgen_run_all_threads(pn); else if (!strcmp(data, "reset")) - pktgen_reset_all_threads(); + pktgen_reset_all_threads(pn); else pr_warning("Unknown command: %s\n", data); @@ -1781,10 +1790,13 @@ static ssize_t pktgen_thread_write(struct file *file, return -EFAULT; i += len; mutex_lock(&pktgen_thread_lock); - pktgen_add_device(t, f); + ret = pktgen_add_device(t, f); mutex_unlock(&pktgen_thread_lock); - ret = count; - sprintf(pg_result, "OK: add_device=%s", f); + if (!ret) { + ret = count; + sprintf(pg_result, "OK: add_device=%s", f); + } else + sprintf(pg_result, "ERROR: can not add device %s", f); goto out; } @@ -1824,13 +1836,14 @@ static const struct file_operations pktgen_thread_fops = { }; /* Think find or remove for NN */ -static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) +static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn, + const char *ifname, int remove) { struct pktgen_thread *t; struct pktgen_dev *pkt_dev = NULL; bool exact = (remove == FIND); - list_for_each_entry(t, &pktgen_threads, th_list) { + list_for_each_entry(t, &pn->pktgen_threads, th_list) { pkt_dev = pktgen_find_dev(t, ifname, exact); if (pkt_dev) { if (remove) { @@ -1848,7 +1861,7 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) /* * mark a device for removal */ -static void pktgen_mark_device(const char *ifname) +static void pktgen_mark_device(const struct pktgen_net *pn, const char *ifname) { struct pktgen_dev *pkt_dev = NULL; const int max_tries = 10, msec_per_try = 125; @@ -1859,7 +1872,7 @@ static void pktgen_mark_device(const char *ifname) while (1) { - pkt_dev = __pktgen_NN_threads(ifname, REMOVE); + pkt_dev = __pktgen_NN_threads(pn, ifname, REMOVE); if (pkt_dev == NULL) break; /* success */ @@ -1880,21 +1893,21 @@ static void pktgen_mark_device(const char *ifname) mutex_unlock(&pktgen_thread_lock); } -static void pktgen_change_name(struct net_device *dev) +static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *dev) { struct pktgen_thread *t; - list_for_each_entry(t, &pktgen_threads, th_list) { + list_for_each_entry(t, &pn->pktgen_threads, th_list) { struct pktgen_dev *pkt_dev; list_for_each_entry(pkt_dev, &t->if_list, list) { if (pkt_dev->odev != dev) continue; - remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); + remove_proc_entry(pkt_dev->entry->name, pn->proc_dir); pkt_dev->entry = proc_create_data(dev->name, 0600, - pg_proc_dir, + pn->proc_dir, &pktgen_if_fops, pkt_dev); if (!pkt_dev->entry) @@ -1909,8 +1922,9 @@ static int pktgen_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = ptr; + struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id); - if (!net_eq(dev_net(dev), &init_net) || pktgen_exiting) + if (pn->pktgen_exiting) return NOTIFY_DONE; /* It is OK that we do not hold the group lock right now, @@ -1919,18 +1933,19 @@ static int pktgen_device_event(struct notifier_block *unused, switch (event) { case NETDEV_CHANGENAME: - pktgen_change_name(dev); + pktgen_change_name(pn, dev); break; case NETDEV_UNREGISTER: - pktgen_mark_device(dev->name); + pktgen_mark_device(pn, dev->name); break; } return NOTIFY_DONE; } -static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev, +static struct net_device *pktgen_dev_get_by_name(const struct pktgen_net *pn, + struct pktgen_dev *pkt_dev, const char *ifname) { char b[IFNAMSIZ+5]; @@ -1944,13 +1959,14 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev, } b[i] = 0; - return dev_get_by_name(&init_net, b); + return dev_get_by_name(pn->net, b); } /* Associate pktgen_dev with a device. */ -static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) +static int pktgen_setup_dev(const struct pktgen_net *pn, + struct pktgen_dev *pkt_dev, const char *ifname) { struct net_device *odev; int err; @@ -1961,7 +1977,7 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) pkt_dev->odev = NULL; } - odev = pktgen_dev_get_by_name(pkt_dev, ifname); + odev = pktgen_dev_get_by_name(pn, pkt_dev, ifname); if (!odev) { pr_err("no such netdevice: \"%s\"\n", ifname); return -ENODEV; @@ -2203,9 +2219,10 @@ static inline int f_pick(struct pktgen_dev *pkt_dev) static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) { struct xfrm_state *x = pkt_dev->flows[flow].x; + struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id); if (!x) { /*slow path: we dont already have xfrm_state*/ - x = xfrm_stateonly_find(&init_net, DUMMY_MARK, + x = xfrm_stateonly_find(pn->net, DUMMY_MARK, (xfrm_address_t *)&pkt_dev->cur_daddr, (xfrm_address_t *)&pkt_dev->cur_saddr, AF_INET, @@ -2912,7 +2929,7 @@ static void pktgen_run(struct pktgen_thread *t) t->control &= ~(T_STOP); } -static void pktgen_stop_all_threads_ifs(void) +static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn) { struct pktgen_thread *t; @@ -2920,7 +2937,7 @@ static void pktgen_stop_all_threads_ifs(void) mutex_lock(&pktgen_thread_lock); - list_for_each_entry(t, &pktgen_threads, th_list) + list_for_each_entry(t, &pn->pktgen_threads, th_list) t->control |= T_STOP; mutex_unlock(&pktgen_thread_lock); @@ -2956,28 +2973,28 @@ signal: return 0; } -static int pktgen_wait_all_threads_run(void) +static int pktgen_wait_all_threads_run(struct pktgen_net *pn) { struct pktgen_thread *t; int sig = 1; mutex_lock(&pktgen_thread_lock); - list_for_each_entry(t, &pktgen_threads, th_list) { + list_for_each_entry(t, &pn->pktgen_threads, th_list) { sig = pktgen_wait_thread_run(t); if (sig == 0) break; } if (sig == 0) - list_for_each_entry(t, &pktgen_threads, th_list) + list_for_each_entry(t, &pn->pktgen_threads, th_list) t->control |= (T_STOP); mutex_unlock(&pktgen_thread_lock); return sig; } -static void pktgen_run_all_threads(void) +static void pktgen_run_all_threads(struct pktgen_net *pn) { struct pktgen_thread *t; @@ -2985,7 +3002,7 @@ static void pktgen_run_all_threads(void) mutex_lock(&pktgen_thread_lock); - list_for_each_entry(t, &pktgen_threads, th_list) + list_for_each_entry(t, &pn->pktgen_threads, th_list) t->control |= (T_RUN); mutex_unlock(&pktgen_thread_lock); @@ -2993,10 +3010,10 @@ static void pktgen_run_all_threads(void) /* Propagate thread->control */ schedule_timeout_interruptible(msecs_to_jiffies(125)); - pktgen_wait_all_threads_run(); + pktgen_wait_all_threads_run(pn); } -static void pktgen_reset_all_threads(void) +static void pktgen_reset_all_threads(struct pktgen_net *pn) { struct pktgen_thread *t; @@ -3004,7 +3021,7 @@ static void pktgen_reset_all_threads(void) mutex_lock(&pktgen_thread_lock); - list_for_each_entry(t, &pktgen_threads, th_list) + list_for_each_entry(t, &pn->pktgen_threads, th_list) t->control |= (T_REMDEVALL); mutex_unlock(&pktgen_thread_lock); @@ -3012,7 +3029,7 @@ static void pktgen_reset_all_threads(void) /* Propagate thread->control */ schedule_timeout_interruptible(msecs_to_jiffies(125)); - pktgen_wait_all_threads_run(); + pktgen_wait_all_threads_run(pn); } static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) @@ -3154,9 +3171,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) static void pktgen_rem_thread(struct pktgen_thread *t) { /* Remove from the thread list */ - - remove_proc_entry(t->tsk->comm, pg_proc_dir); - + remove_proc_entry(t->tsk->comm, t->net->proc_dir); } static void pktgen_resched(struct pktgen_dev *pkt_dev) @@ -3302,7 +3317,7 @@ static int pktgen_thread_worker(void *arg) pkt_dev = next_to_run(t); if (unlikely(!pkt_dev && t->control == 0)) { - if (pktgen_exiting) + if (t->net->pktgen_exiting) break; wait_event_interruptible_timeout(t->queue, t->control != 0, @@ -3424,7 +3439,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) /* We don't allow a device to be on several threads */ - pkt_dev = __pktgen_NN_threads(ifname, FIND); + pkt_dev = __pktgen_NN_threads(t->net, ifname, FIND); if (pkt_dev) { pr_err("ERROR: interface already used\n"); return -EBUSY; @@ -3459,13 +3474,13 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->svlan_id = 0xffff; pkt_dev->node = -1; - err = pktgen_setup_dev(pkt_dev, ifname); + err = pktgen_setup_dev(t->net, pkt_dev, ifname); if (err) goto out1; if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING) pkt_dev->clone_skb = pg_clone_skb_d; - pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir, + pkt_dev->entry = proc_create_data(ifname, 0600, t->net->proc_dir, &pktgen_if_fops, pkt_dev); if (!pkt_dev->entry) { pr_err("cannot create %s/%s procfs entry\n", @@ -3490,7 +3505,7 @@ out1: return err; } -static int __init pktgen_create_thread(int cpu) +static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) { struct pktgen_thread *t; struct proc_dir_entry *pe; @@ -3508,7 +3523,7 @@ static int __init pktgen_create_thread(int cpu) INIT_LIST_HEAD(&t->if_list); - list_add_tail(&t->th_list, &pktgen_threads); + list_add_tail(&t->th_list, &pn->pktgen_threads); init_completion(&t->start_done); p = kthread_create_on_node(pktgen_thread_worker, @@ -3524,7 +3539,7 @@ static int __init pktgen_create_thread(int cpu) kthread_bind(p, cpu); t->tsk = p; - pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir, + pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir, &pktgen_thread_fops, t); if (!pe) { pr_err("cannot create %s/%s procfs entry\n", @@ -3535,6 +3550,7 @@ static int __init pktgen_create_thread(int cpu) return -EINVAL; } + t->net = pn; wake_up_process(p); wait_for_completion(&t->start_done); @@ -3560,6 +3576,7 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t, static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) { + struct pktgen_net *pn = t->net; pr_debug("remove_device pkt_dev=%p\n", pkt_dev); @@ -3580,7 +3597,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, _rem_dev_from_if_list(t, pkt_dev); if (pkt_dev->entry) - remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); + remove_proc_entry(pkt_dev->entry->name, pn->proc_dir); #ifdef CONFIG_XFRM free_SAs(pkt_dev); @@ -3592,63 +3609,63 @@ static int pktgen_remove_device(struct pktgen_thread *t, return 0; } -static int __init pg_init(void) +static int __net_init pg_net_init(struct net *net) { - int cpu; + struct pktgen_net *pn = net_generic(net, pg_net_id); struct proc_dir_entry *pe; - int ret = 0; - - pr_info("%s", version); - - pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); - if (!pg_proc_dir) + int cpu, ret = 0; + + pn->net = net; + INIT_LIST_HEAD(&pn->pktgen_threads); + pn->pktgen_exiting = false; + pn->proc_dir = proc_mkdir(PG_PROC_DIR, pn->net->proc_net); + if (!pn->proc_dir) { + pr_warn("cannot create /proc/net/%s\n", PG_PROC_DIR); return -ENODEV; - - pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops); + } + pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_fops); if (pe == NULL) { - pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL); + pr_err("cannot create %s procfs entry\n", PGCTRL); ret = -EINVAL; - goto remove_dir; + goto remove; } - register_netdevice_notifier(&pktgen_notifier_block); - for_each_online_cpu(cpu) { int err; - err = pktgen_create_thread(cpu); + err = pktgen_create_thread(cpu, pn); if (err) - pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n", + pr_warn("Cannot create thread for cpu %d (%d)\n", cpu, err); } - if (list_empty(&pktgen_threads)) { - pr_err("ERROR: Initialization failed for all threads\n"); + if (list_empty(&pn->pktgen_threads)) { + pr_err("Initialization failed for all threads\n"); ret = -ENODEV; - goto unregister; + goto remove_entry; } return 0; - unregister: - unregister_netdevice_notifier(&pktgen_notifier_block); - remove_proc_entry(PGCTRL, pg_proc_dir); - remove_dir: - proc_net_remove(&init_net, PG_PROC_DIR); +remove_entry: + remove_proc_entry(PGCTRL, pn->proc_dir); +remove: + remove_proc_entry(PG_PROC_DIR, pn->net->proc_net); return ret; } -static void __exit pg_cleanup(void) +static void __net_exit pg_net_exit(struct net *net) { + struct pktgen_net *pn = net_generic(net, pg_net_id); struct pktgen_thread *t; struct list_head *q, *n; LIST_HEAD(list); /* Stop all interfaces & threads */ - pktgen_exiting = true; + pn->pktgen_exiting = true; mutex_lock(&pktgen_thread_lock); - list_splice_init(&pktgen_threads, &list); + list_splice_init(&pn->pktgen_threads, &list); mutex_unlock(&pktgen_thread_lock); list_for_each_safe(q, n, &list) { @@ -3658,12 +3675,36 @@ static void __exit pg_cleanup(void) kfree(t); } - /* Un-register us from receiving netdevice events */ - unregister_netdevice_notifier(&pktgen_notifier_block); + remove_proc_entry(PGCTRL, pn->proc_dir); + remove_proc_entry(PG_PROC_DIR, pn->net->proc_net); +} + +static struct pernet_operations pg_net_ops = { + .init = pg_net_init, + .exit = pg_net_exit, + .id = &pg_net_id, + .size = sizeof(struct pktgen_net), +}; + +static int __init pg_init(void) +{ + int ret = 0; - /* Clean up proc file system */ - remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(&init_net, PG_PROC_DIR); + pr_info("%s", version); + ret = register_pernet_subsys(&pg_net_ops); + if (ret) + return ret; + ret = register_netdevice_notifier(&pktgen_notifier_block); + if (ret) + unregister_pernet_subsys(&pg_net_ops); + + return ret; +} + +static void __exit pg_cleanup(void) +{ + unregister_netdevice_notifier(&pktgen_notifier_block); + unregister_pernet_subsys(&pg_net_ops); } module_init(pg_init); diff --git a/net/core/request_sock.c b/net/core/request_sock.c index c31d9e8..4425148 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -186,8 +186,6 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, struct fastopen_queue *fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq; - BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk)); - tcp_sk(sk)->fastopen_rsk = NULL; spin_lock_bh(&fastopenq->lock); fastopenq->qlen--; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1868625..23854b5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -496,8 +496,10 @@ static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) } if (ops->fill_info) { data = nla_nest_start(skb, IFLA_INFO_DATA); - if (data == NULL) + if (data == NULL) { + err = -EMSGSIZE; goto err_cancel_link; + } err = ops->fill_info(skb, dev); if (err < 0) goto err_cancel_data; @@ -780,6 +782,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_MTU */ + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(1) /* IFLA_CARRIER */ + nla_total_size(4) /* IFLA_PROMISCUITY */ + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ @@ -879,6 +882,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, const struct rtnl_link_stats64 *stats; struct nlattr *attr, *af_spec; struct rtnl_af_ops *af_ops; + struct net_device *upper_dev = netdev_master_upper_dev_get(dev); ASSERT_RTNL(); nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -907,8 +911,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, #endif (dev->ifindex != dev->iflink && nla_put_u32(skb, IFLA_LINK, dev->iflink)) || - (dev->master && - nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || + (upper_dev && + nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) || + nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || (dev->qdisc && nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || (dev->ifalias && @@ -976,6 +981,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, * report anything. */ ivi.spoofchk = -1; + memset(ivi.mac, 0, sizeof(ivi.mac)); if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) break; vf_mac.vf = @@ -1057,7 +1063,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0, s_idx; struct net_device *dev; struct hlist_head *head; - struct hlist_node *node; struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; @@ -1067,7 +1072,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->dev_base_seq; - if (nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, + if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) @@ -1077,7 +1082,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, @@ -1108,6 +1113,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_LINK] = { .type = NLA_U32 }, [IFLA_MASTER] = { .type = NLA_U32 }, + [IFLA_CARRIER] = { .type = NLA_U8 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, [IFLA_WEIGHT] = { .type = NLA_U32 }, [IFLA_OPERSTATE] = { .type = NLA_U8 }, @@ -1270,16 +1276,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) static int do_set_master(struct net_device *dev, int ifindex) { - struct net_device *master_dev; + struct net_device *upper_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops; int err; - if (dev->master) { - if (dev->master->ifindex == ifindex) + if (upper_dev) { + if (upper_dev->ifindex == ifindex) return 0; - ops = dev->master->netdev_ops; + ops = upper_dev->netdev_ops; if (ops->ndo_del_slave) { - err = ops->ndo_del_slave(dev->master, dev); + err = ops->ndo_del_slave(upper_dev, dev); if (err) return err; } else { @@ -1288,12 +1294,12 @@ static int do_set_master(struct net_device *dev, int ifindex) } if (ifindex) { - master_dev = __dev_get_by_index(dev_net(dev), ifindex); - if (!master_dev) + upper_dev = __dev_get_by_index(dev_net(dev), ifindex); + if (!upper_dev) return -EINVAL; - ops = master_dev->netdev_ops; + ops = upper_dev->netdev_ops; if (ops->ndo_add_slave) { - err = ops->ndo_add_slave(master_dev, dev); + err = ops->ndo_add_slave(upper_dev, dev); if (err) return err; } else { @@ -1307,7 +1313,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { const struct net_device_ops *ops = dev->netdev_ops; - int send_addr_notify = 0; int err; if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) { @@ -1360,16 +1365,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct sockaddr *sa; int len; - if (!ops->ndo_set_mac_address) { - err = -EOPNOTSUPP; - goto errout; - } - - if (!netif_device_present(dev)) { - err = -ENODEV; - goto errout; - } - len = sizeof(sa_family_t) + dev->addr_len; sa = kmalloc(len, GFP_KERNEL); if (!sa) { @@ -1379,13 +1374,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); - err = ops->ndo_set_mac_address(dev, sa); + err = dev_set_mac_address(dev, sa); kfree(sa); if (err) goto errout; - send_addr_notify = 1; modified = 1; - add_device_randomness(dev->dev_addr, dev->addr_len); } if (tb[IFLA_MTU]) { @@ -1422,7 +1415,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, if (tb[IFLA_BROADCAST]) { nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); - send_addr_notify = 1; + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); } if (ifm->ifi_flags || ifm->ifi_change) { @@ -1438,6 +1431,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, modified = 1; } + if (tb[IFLA_CARRIER]) { + err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); + if (err) + goto errout; + modified = 1; + } + if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); @@ -1536,9 +1536,6 @@ errout: net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n", dev->name); - if (send_addr_notify) - call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); - return err; } @@ -1672,9 +1669,11 @@ struct net_device *rtnl_create_link(struct net *net, if (tb[IFLA_MTU]) dev->mtu = nla_get_u32(tb[IFLA_MTU]); - if (tb[IFLA_ADDRESS]) + if (tb[IFLA_ADDRESS]) { memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), nla_len(tb[IFLA_ADDRESS])); + dev->addr_assign_type = NET_ADDR_SET; + } if (tb[IFLA_BROADCAST]) memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), nla_len(tb[IFLA_BROADCAST])); @@ -1923,7 +1922,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) u32 ext_filter_mask = 0; u16 min_ifinfo_dump_size = 0; - if (nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, + if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); @@ -1992,6 +1991,7 @@ errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_LINK, err); } +EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, @@ -2054,16 +2054,12 @@ errout: static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); - struct net_device *master = NULL; struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; u8 *addr; int err; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); if (err < 0) return err; @@ -2096,10 +2092,10 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) /* Support fdb on master device the net/bridge default case */ if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && (dev->priv_flags & IFF_BRIDGE_PORT)) { - master = dev->master; - err = master->netdev_ops->ndo_fdb_add(ndm, tb, - dev, addr, - nlh->nlmsg_flags); + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + const struct net_device_ops *ops = br_dev->netdev_ops; + + err = ops->ndo_fdb_add(ndm, tb, dev, addr, nlh->nlmsg_flags); if (err) goto out; else @@ -2125,7 +2121,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; - struct nlattr *llattr; + struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; int err = -EINVAL; __u8 *addr; @@ -2133,8 +2129,9 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (nlmsg_len(nlh) < sizeof(*ndm)) - return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + if (err < 0) + return err; ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { @@ -2148,22 +2145,27 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -ENODEV; } - llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR); - if (llattr == NULL || nla_len(llattr) != ETH_ALEN) { - pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n"); + if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n"); + return -EINVAL; + } + + addr = nla_data(tb[NDA_LLADDR]); + if (!is_valid_ether_addr(addr)) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n"); return -EINVAL; } - addr = nla_data(llattr); err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && (dev->priv_flags & IFF_BRIDGE_PORT)) { - struct net_device *master = dev->master; + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + const struct net_device_ops *ops = br_dev->netdev_ops; - if (master->netdev_ops->ndo_fdb_del) - err = master->netdev_ops->ndo_fdb_del(ndm, dev, addr); + if (ops->ndo_fdb_del) + err = ops->ndo_fdb_del(ndm, tb, dev, addr); if (err) goto out; @@ -2173,7 +2175,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) /* Embedded bridge, macvlan, and any other device support */ if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { - err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr); + err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); if (!err) { rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); @@ -2247,9 +2249,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); for_each_netdev_rcu(net, dev) { if (dev->priv_flags & IFF_BRIDGE_PORT) { - struct net_device *master = dev->master; - const struct net_device_ops *ops = master->netdev_ops; + struct net_device *br_dev; + const struct net_device_ops *ops; + br_dev = netdev_master_upper_dev_get(dev); + ops = br_dev->netdev_ops; if (ops->ndo_fdb_dump) idx = ops->ndo_fdb_dump(skb, cb, dev, idx); } @@ -2270,6 +2274,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct ifinfomsg *ifm; struct nlattr *br_afspec; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + struct net_device *br_dev = netdev_master_upper_dev_get(dev); nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); if (nlh == NULL) @@ -2287,8 +2292,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, if (nla_put_string(skb, IFLA_IFNAME, dev->name) || nla_put_u32(skb, IFLA_MTU, dev->mtu) || nla_put_u8(skb, IFLA_OPERSTATE, operstate) || - (dev->master && - nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || + (br_dev && + nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) || (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || (dev->ifindex != dev->iflink && @@ -2320,23 +2325,31 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0; u32 portid = NETLINK_CB(cb->skb).portid; u32 seq = cb->nlh->nlmsg_seq; + struct nlattr *extfilt; + u32 filter_mask = 0; + + extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg), + IFLA_EXT_MASK); + if (extfilt) + filter_mask = nla_get_u32(extfilt); rcu_read_lock(); for_each_netdev_rcu(net, dev) { const struct net_device_ops *ops = dev->netdev_ops; - struct net_device *master = dev->master; + struct net_device *br_dev = netdev_master_upper_dev_get(dev); - if (master && master->netdev_ops->ndo_bridge_getlink) { + if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && - master->netdev_ops->ndo_bridge_getlink( - skb, portid, seq, dev) < 0) + br_dev->netdev_ops->ndo_bridge_getlink( + skb, portid, seq, dev, filter_mask) < 0) break; idx++; } if (ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && - ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) + ops->ndo_bridge_getlink(skb, portid, seq, dev, + filter_mask) < 0) break; idx++; } @@ -2365,7 +2378,7 @@ static inline size_t bridge_nlmsg_size(void) static int rtnl_bridge_notify(struct net_device *dev, u16 flags) { struct net *net = dev_net(dev); - struct net_device *master = dev->master; + struct net_device *br_dev = netdev_master_upper_dev_get(dev); struct sk_buff *skb; int err = -EOPNOTSUPP; @@ -2376,15 +2389,15 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags) } if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && - master && master->netdev_ops->ndo_bridge_getlink) { - err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { + err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } if ((flags & BRIDGE_FLAGS_SELF) && dev->netdev_ops->ndo_bridge_getlink) { - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } @@ -2436,13 +2449,14 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, oflags = flags; if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { - if (!dev->master || - !dev->master->netdev_ops->ndo_bridge_setlink) { + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + + if (!br_dev || !br_dev->netdev_ops->ndo_bridge_setlink) { err = -EOPNOTSUPP; goto out; } - err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh); + err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh); if (err) goto out; @@ -2468,6 +2482,77 @@ out: return err; } +static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 oflags, flags = 0; + bool have_flags = false; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_family != AF_BRIDGE) + return -EPFNOSUPPORT; + + dev = __dev_get_by_index(net, ifm->ifi_index); + if (!dev) { + pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + return -ENODEV; + } + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + have_flags = true; + flags = nla_get_u16(attr); + break; + } + } + } + + oflags = flags; + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + + if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) { + err = -EOPNOTSUPP; + goto out; + } + + err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + if (err) + goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; + } + + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_dellink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + + if (have_flags) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, oflags); +out: + return err; +} + /* Protected by RTNL sempahore. */ static struct rtattr **rta_buf; static int rtattr_max; @@ -2538,7 +2623,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); while (RTA_OK(attr, attrlen)) { - unsigned int flavor = attr->rta_type; + unsigned int flavor = attr->rta_type & NLA_TYPE_MASK; if (flavor) { if (flavor > rta_max[sz_idx]) return -EINVAL; @@ -2651,6 +2736,7 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); + rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } diff --git a/net/core/scm.c b/net/core/scm.c index 57fb1ee..2dc6cda 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -24,6 +24,7 @@ #include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/security.h> +#include <linux/pid_namespace.h> #include <linux/pid.h> #include <linux/nsproxy.h> #include <linux/slab.h> @@ -35,6 +36,7 @@ #include <net/sock.h> #include <net/compat.h> #include <net/scm.h> +#include <net/cls_cgroup.h> /* @@ -51,7 +53,8 @@ static __inline__ int scm_check_creds(struct ucred *creds) if (!uid_valid(uid) || !gid_valid(gid)) return -EINVAL; - if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) && + if ((creds->pid == task_tgid_vnr(current) || + ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || @@ -302,8 +305,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) } /* Bump the usage count and install the file. */ sock = sock_from_file(fp[i], &err); - if (sock) + if (sock) { sock_update_netprioidx(sock->sk, current); + sock_update_classid(sock->sk, current); + } fd_install(new_fd, get_file(fp[i])); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3ab989b..33245ef 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -104,47 +104,37 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = { .get = sock_pipe_buf_get, }; -/* - * Keep out-of-line to prevent kernel bloat. - * __builtin_return_address is not used because it is not always - * reliable. - */ - /** - * skb_over_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_put(). Not user callable. + * skb_panic - private function for out-of-line support + * @skb: buffer + * @sz: size + * @addr: address + * @msg: skb_over_panic or skb_under_panic + * + * Out-of-line support for skb_put() and skb_push(). + * Called via the wrapper skb_over_panic() or skb_under_panic(). + * Keep out of line to prevent kernel bloat. + * __builtin_return_address is not used because it is not always reliable. */ -static void skb_over_panic(struct sk_buff *skb, int sz, void *here) +static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr, + const char msg[]) { pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", - __func__, here, skb->len, sz, skb->head, skb->data, + msg, addr, skb->len, sz, skb->head, skb->data, (unsigned long)skb->tail, (unsigned long)skb->end, skb->dev ? skb->dev->name : "<NULL>"); BUG(); } -/** - * skb_under_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_push(). Not user callable. - */ - -static void skb_under_panic(struct sk_buff *skb, int sz, void *here) +static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr) { - pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", - __func__, here, skb->len, sz, skb->head, skb->data, - (unsigned long)skb->tail, (unsigned long)skb->end, - skb->dev ? skb->dev->name : "<NULL>"); - BUG(); + skb_panic(skb, sz, addr, __func__); } +static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) +{ + skb_panic(skb, sz, addr, __func__); +} /* * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells @@ -155,8 +145,9 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) */ #define kmalloc_reserve(size, gfp, node, pfmemalloc) \ __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) -void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip, - bool *pfmemalloc) + +static void *__kmalloc_reserve(size_t size, gfp_t flags, int node, + unsigned long ip, bool *pfmemalloc) { void *obj; bool ret_pfmemalloc = false; @@ -259,6 +250,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->end = skb->tail + size; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->mac_header = ~0U; + skb->transport_header = ~0U; #endif /* make sure we initialize shinfo sequentially */ @@ -327,6 +319,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) skb->end = skb->tail + size; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->mac_header = ~0U; + skb->transport_header = ~0U; #endif /* make sure we initialize shinfo sequentially */ @@ -348,10 +341,6 @@ struct netdev_alloc_cache { }; static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) -#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) -#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE - static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { struct netdev_alloc_cache *nc; @@ -683,7 +672,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->network_header = old->network_header; new->mac_header = old->mac_header; new->inner_transport_header = old->inner_transport_header; - new->inner_network_header = old->inner_transport_header; + new->inner_network_header = old->inner_network_header; skb_dst_copy(new, old); new->rxhash = old->rxhash; new->ooo_okay = old->ooo_okay; @@ -1649,7 +1638,7 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) static struct page *linear_to_page(struct page *page, unsigned int *len, unsigned int *offset, - struct sk_buff *skb, struct sock *sk) + struct sock *sk) { struct page_frag *pfrag = sk_page_frag(sk); @@ -1682,14 +1671,14 @@ static bool spd_can_coalesce(const struct splice_pipe_desc *spd, static bool spd_fill_page(struct splice_pipe_desc *spd, struct pipe_inode_info *pipe, struct page *page, unsigned int *len, unsigned int offset, - struct sk_buff *skb, bool linear, + bool linear, struct sock *sk) { if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) return true; if (linear) { - page = linear_to_page(page, len, &offset, skb, sk); + page = linear_to_page(page, len, &offset, sk); if (!page) return true; } @@ -1706,23 +1695,9 @@ static bool spd_fill_page(struct splice_pipe_desc *spd, return false; } -static inline void __segment_seek(struct page **page, unsigned int *poff, - unsigned int *plen, unsigned int off) -{ - unsigned long n; - - *poff += off; - n = *poff / PAGE_SIZE; - if (n) - *page = nth_page(*page, n); - - *poff = *poff % PAGE_SIZE; - *plen -= off; -} - static bool __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, - unsigned int *len, struct sk_buff *skb, + unsigned int *len, struct splice_pipe_desc *spd, bool linear, struct sock *sk, struct pipe_inode_info *pipe) @@ -1737,23 +1712,19 @@ static bool __splice_segment(struct page *page, unsigned int poff, } /* ignore any bits we already processed */ - if (*off) { - __segment_seek(&page, &poff, &plen, *off); - *off = 0; - } + poff += *off; + plen -= *off; + *off = 0; do { unsigned int flen = min(*len, plen); - /* the linear region may spread across several pages */ - flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - - if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) + if (spd_fill_page(spd, pipe, page, &flen, poff, + linear, sk)) return true; - - __segment_seek(&page, &poff, &plen, flen); + poff += flen; + plen -= flen; *len -= flen; - } while (*len && plen); return false; @@ -1777,7 +1748,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), - offset, len, skb, spd, + offset, len, spd, skb_head_is_locked(skb), sk, pipe)) return true; @@ -1790,7 +1761,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, if (__splice_segment(skb_frag_page(f), f->page_offset, skb_frag_size(f), - offset, len, skb, spd, false, sk, pipe)) + offset, len, spd, false, sk, pipe)) return true; } @@ -2355,6 +2326,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); + skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -2686,48 +2658,37 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, int len, int odd, struct sk_buff *skb), void *from, int length) { - int frg_cnt = 0; - skb_frag_t *frag = NULL; - struct page *page = NULL; - int copy, left; + int frg_cnt = skb_shinfo(skb)->nr_frags; + int copy; int offset = 0; int ret; + struct page_frag *pfrag = ¤t->task_frag; do { /* Return error if we don't have space for new frag */ - frg_cnt = skb_shinfo(skb)->nr_frags; if (frg_cnt >= MAX_SKB_FRAGS) - return -EFAULT; + return -EMSGSIZE; - /* allocate a new page for next frag */ - page = alloc_pages(sk->sk_allocation, 0); - - /* If alloc_page fails just return failure and caller will - * free previous allocated pages by doing kfree_skb() - */ - if (page == NULL) + if (!sk_page_frag_refill(sk, pfrag)) return -ENOMEM; - /* initialize the next frag */ - skb_fill_page_desc(skb, frg_cnt, page, 0, 0); - skb->truesize += PAGE_SIZE; - atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); - - /* get the new initialized frag */ - frg_cnt = skb_shinfo(skb)->nr_frags; - frag = &skb_shinfo(skb)->frags[frg_cnt - 1]; - /* copy the user data to page */ - left = PAGE_SIZE - frag->page_offset; - copy = (length > left)? left : length; + copy = min_t(int, length, pfrag->size - pfrag->offset); - ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag), - offset, copy, 0, skb); + ret = getfrag(from, page_address(pfrag->page) + pfrag->offset, + offset, copy, 0, skb); if (ret < 0) return -EFAULT; /* copy was successful so update the size parameters */ - skb_frag_size_add(frag, copy); + skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset, + copy); + frg_cnt++; + pfrag->offset += copy; + get_page(pfrag->page); + + skb->truesize += copy; + atomic_add(copy, &sk->sk_wmem_alloc); skb->len += copy; skb->data_len += copy; offset += copy; @@ -2777,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; + unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int headroom; unsigned int len; int sg = !!(features & NETIF_F_SG); @@ -2853,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_set_network_header(nskb, skb->mac_len); nskb->transport_header = (nskb->network_header + skb_network_header_len(skb)); - skb_copy_from_linear_data(skb, nskb->data, doffset); + + skb_copy_from_linear_data_offset(skb, -tnl_hlen, + nskb->data - tnl_hlen, + doffset + tnl_hlen); if (fskb != skb_shinfo(skb)->frag_list) continue; @@ -2871,6 +2836,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); + skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; __skb_frag_ref(frag); diff --git a/net/core/sock.c b/net/core/sock.c index a692ef4..b261a79 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -186,8 +186,10 @@ void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg) static struct lock_class_key af_family_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; +#if defined(CONFIG_MEMCG_KMEM) struct static_key memcg_socket_limit_enabled; EXPORT_SYMBOL(memcg_socket_limit_enabled); +#endif /* * Make lock validator output more readable. (we pre-construct these @@ -583,7 +585,7 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval, goto out; retry: - seq = read_seqbegin(&devnet_rename_seq); + seq = read_seqcount_begin(&devnet_rename_seq); rcu_read_lock(); dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); ret = -ENODEV; @@ -594,7 +596,7 @@ retry: strcpy(devname, dev->name); rcu_read_unlock(); - if (read_seqretry(&devnet_rename_seq, seq)) + if (read_seqcount_retry(&devnet_rename_seq, seq)) goto retry; len = strlen(devname) + 1; @@ -665,6 +667,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname, case SO_REUSEADDR: sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); break; + case SO_REUSEPORT: + sk->sk_reuseport = valbool; + break; case SO_TYPE: case SO_PROTOCOL: case SO_DOMAIN: @@ -861,6 +866,13 @@ set_rcvbuf: ret = sk_detach_filter(sk); break; + case SO_LOCK_FILTER: + if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool) + ret = -EPERM; + else + sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool); + break; + case SO_PASSSEC: if (valbool) set_bit(SOCK_PASSSEC, &sock->flags); @@ -965,6 +977,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_reuse; break; + case SO_REUSEPORT: + v.val = sk->sk_reuseport; + break; + case SO_KEEPALIVE: v.val = sock_flag(sk, SOCK_KEEPOPEN); break; @@ -1140,6 +1156,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, goto lenout; + case SO_LOCK_FILTER: + v.val = sock_flag(sk, SOCK_FILTER_LOCKED); + break; + default: return -ENOPROTOOPT; } @@ -2212,7 +2232,7 @@ EXPORT_SYMBOL(sk_reset_timer); void sk_stop_timer(struct sock *sk, struct timer_list* timer) { - if (timer_pending(timer) && del_timer(timer)) + if (del_timer(timer)) __sock_put(sk); } EXPORT_SYMBOL(sk_stop_timer); @@ -2818,7 +2838,7 @@ static const struct file_operations proto_seq_fops = { static __net_init int proto_init_net(struct net *net) { - if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops)) + if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops)) return -ENOMEM; return 0; @@ -2826,7 +2846,7 @@ static __net_init int proto_init_net(struct net *net) static __net_exit void proto_exit_net(struct net *net) { - proc_net_remove(net, "protocols"); + remove_proc_entry("protocols", net->proc_net); } diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 602cd63..a29e90c 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -97,21 +97,6 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld) } EXPORT_SYMBOL_GPL(sock_diag_unregister); -static const inline struct sock_diag_handler *sock_diag_lock_handler(int family) -{ - if (sock_diag_handlers[family] == NULL) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, family); - - mutex_lock(&sock_diag_table_mutex); - return sock_diag_handlers[family]; -} - -static inline void sock_diag_unlock_handler(const struct sock_diag_handler *h) -{ - mutex_unlock(&sock_diag_table_mutex); -} - static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { int err; @@ -121,12 +106,20 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (nlmsg_len(nlh) < sizeof(*req)) return -EINVAL; - hndl = sock_diag_lock_handler(req->sdiag_family); + if (req->sdiag_family >= AF_MAX) + return -EINVAL; + + if (sock_diag_handlers[req->sdiag_family] == NULL) + request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, req->sdiag_family); + + mutex_lock(&sock_diag_table_mutex); + hndl = sock_diag_handlers[req->sdiag_family]; if (hndl == NULL) err = -ENOENT; else err = hndl->dump(skb, nlh); - sock_diag_unlock_handler(hndl); + mutex_unlock(&sock_diag_table_mutex); return err; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d1b0804..cfdb46a 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -20,6 +20,8 @@ #include <net/sock.h> #include <net/net_ratelimit.h> +static int one = 1; + #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -92,28 +94,32 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_wmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "rmem_max", .data = &sysctl_rmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "wmem_default", .data = &sysctl_wmem_default, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "rmem_default", .data = &sysctl_rmem_default, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "dev_weight", diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 1b588e2..21291f1 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -284,6 +284,7 @@ static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlmsghdr *nlh, if (!netdev->dcbnl_ops->getpermhwaddr) return -EOPNOTSUPP; + memset(perm_addr, 0, sizeof(perm_addr)); netdev->dcbnl_ops->getpermhwaddr(netdev, perm_addr); return nla_put(skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr), perm_addr); @@ -1042,6 +1043,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) if (ops->ieee_getets) { struct ieee_ets ets; + memset(&ets, 0, sizeof(ets)); err = ops->ieee_getets(netdev, &ets); if (!err && nla_put(skb, DCB_ATTR_IEEE_ETS, sizeof(ets), &ets)) @@ -1050,6 +1052,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) if (ops->ieee_getmaxrate) { struct ieee_maxrate maxrate; + memset(&maxrate, 0, sizeof(maxrate)); err = ops->ieee_getmaxrate(netdev, &maxrate); if (!err) { err = nla_put(skb, DCB_ATTR_IEEE_MAXRATE, @@ -1061,6 +1064,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) if (ops->ieee_getpfc) { struct ieee_pfc pfc; + memset(&pfc, 0, sizeof(pfc)); err = ops->ieee_getpfc(netdev, &pfc); if (!err && nla_put(skb, DCB_ATTR_IEEE_PFC, sizeof(pfc), &pfc)) @@ -1094,6 +1098,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) /* get peer info if available */ if (ops->ieee_peer_getets) { struct ieee_ets ets; + memset(&ets, 0, sizeof(ets)); err = ops->ieee_peer_getets(netdev, &ets); if (!err && nla_put(skb, DCB_ATTR_IEEE_PEER_ETS, sizeof(ets), &ets)) @@ -1102,6 +1107,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) if (ops->ieee_peer_getpfc) { struct ieee_pfc pfc; + memset(&pfc, 0, sizeof(pfc)); err = ops->ieee_peer_getpfc(netdev, &pfc); if (!err && nla_put(skb, DCB_ATTR_IEEE_PEER_PFC, sizeof(pfc), &pfc)) @@ -1280,6 +1286,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) /* peer info if available */ if (ops->cee_peer_getpg) { struct cee_pg pg; + memset(&pg, 0, sizeof(pg)); err = ops->cee_peer_getpg(netdev, &pg); if (!err && nla_put(skb, DCB_ATTR_CEE_PEER_PG, sizeof(pg), &pg)) @@ -1288,6 +1295,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) if (ops->cee_peer_getpfc) { struct cee_pfc pfc; + memset(&pfc, 0, sizeof(pfc)); err = ops->cee_peer_getpfc(netdev, &pfc); if (!err && nla_put(skb, DCB_ATTR_CEE_PEER_PFC, sizeof(pfc), &pfc)) diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index b75968a..8c0ef71 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -1,6 +1,6 @@ menuconfig IP_DCCP - tristate "The DCCP Protocol (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL + tristate "The DCCP Protocol" + depends on INET ---help--- Datagram Congestion Control Protocol (RFC 4340) diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 0581143..8ba3fc9 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -1,5 +1,4 @@ -menu "DCCP CCIDs Configuration (EXPERIMENTAL)" - depends on EXPERIMENTAL +menu "DCCP CCIDs Configuration" config IP_DCCP_CCID2_DEBUG bool "CCID-2 debugging messages" @@ -12,7 +11,7 @@ config IP_DCCP_CCID2_DEBUG If in doubt, say N. config IP_DCCP_CCID3 - bool "CCID-3 (TCP-Friendly) (EXPERIMENTAL)" + bool "CCID-3 (TCP-Friendly)" def_bool y if (IP_DCCP = y || IP_DCCP = m) ---help--- CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 0a8d6eb..4c6bdf9 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -171,7 +171,7 @@ static __init int dccpprobe_init(void) spin_lock_init(&dccpw.lock); if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL)) return ret; - if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) + if (!proc_create(procname, S_IRUSR, init_net.proc_net, &dccpprobe_fops)) goto err0; ret = setup_jprobe(); @@ -181,7 +181,7 @@ static __init int dccpprobe_init(void) pr_info("DCCP watch registered (port=%d)\n", port); return 0; err1: - proc_net_remove(&init_net, procname); + remove_proc_entry(procname, init_net.proc_net); err0: kfifo_free(&dccpw.fifo); return ret; @@ -191,7 +191,7 @@ module_init(dccpprobe_init); static __exit void dccpprobe_exit(void) { kfifo_free(&dccpw.fifo); - proc_net_remove(&init_net, procname); + remove_proc_entry(procname, init_net.proc_net); unregister_jprobe(&dccp_send_probe); } diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig index 7914fd6..f3393e1 100644 --- a/net/decnet/Kconfig +++ b/net/decnet/Kconfig @@ -25,8 +25,8 @@ config DECNET The module is called decnet. config DECNET_ROUTER - bool "DECnet: router support (EXPERIMENTAL)" - depends on DECNET && EXPERIMENTAL + bool "DECnet: router support" + depends on DECNET select FIB_RULES ---help--- Add support for turning your DECnet Endnode into a level 1 or 2 diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 307c322..c21f200 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -175,12 +175,11 @@ static struct hlist_head *dn_find_list(struct sock *sk) static int check_port(__le16 port) { struct sock *sk; - struct hlist_node *node; if (port == 0) return -1; - sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) { + sk_for_each(sk, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) { struct dn_scp *scp = DN_SK(sk); if (scp->addrloc == port) return -1; @@ -374,11 +373,10 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr) { struct hlist_head *list = listen_hash(addr); - struct hlist_node *node; struct sock *sk; read_lock(&dn_hash_lock); - sk_for_each(sk, node, list) { + sk_for_each(sk, list) { struct dn_scp *scp = DN_SK(sk); if (sk->sk_state != TCP_LISTEN) continue; @@ -414,11 +412,10 @@ struct sock *dn_find_by_skb(struct sk_buff *skb) { struct dn_skb_cb *cb = DN_SKB_CB(skb); struct sock *sk; - struct hlist_node *node; struct dn_scp *scp; read_lock(&dn_hash_lock); - sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) { + sk_for_each(sk, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) { scp = DN_SK(sk); if (cb->src != dn_saddr2dn(&scp->peer)) continue; @@ -909,6 +906,7 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, struct dn_scp *scp = DN_SK(sk); int err = -EISCONN; struct flowidn fld; + struct dst_entry *dst; if (sock->state == SS_CONNECTED) goto out; @@ -955,10 +953,11 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, fld.flowidn_proto = DNPROTO_NSP; if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0) goto out; - sk->sk_route_caps = sk->sk_dst_cache->dev->features; + dst = __sk_dst_get(sk); + sk->sk_route_caps = dst->dev->features; sock->state = SS_CONNECTING; scp->state = DN_CI; - scp->segsize_loc = dst_metric_advmss(sk->sk_dst_cache); + scp->segsize_loc = dst_metric_advmss(dst); dn_nsp_send_conninit(sk, NSP_CI); err = -EINPROGRESS; @@ -2382,7 +2381,7 @@ static int __init decnet_init(void) dev_add_pack(&dn_dix_packet_type); register_netdevice_notifier(&dn_dev_notifier); - proc_net_fops_create(&init_net, "decnet", S_IRUGO, &dn_socket_seq_fops); + proc_create("decnet", S_IRUGO, init_net.proc_net, &dn_socket_seq_fops); dn_register_sysctl(); out: return rc; @@ -2411,7 +2410,7 @@ static void __exit decnet_exit(void) dn_neigh_cleanup(); dn_fib_cleanup(); - proc_net_remove(&init_net, "decnet"); + remove_proc_entry("decnet", init_net.proc_net); proto_unregister(&dn_proto); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index e47ba9f..c8da116 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1412,7 +1412,7 @@ void __init dn_dev_init(void) rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, NULL); rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, NULL); - proc_net_fops_create(&init_net, "decnet_dev", S_IRUGO, &dn_dev_seq_fops); + proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops); #ifdef CONFIG_SYSCTL { @@ -1433,7 +1433,7 @@ void __exit dn_dev_cleanup(void) } #endif /* CONFIG_SYSCTL */ - proc_net_remove(&init_net, "decnet_dev"); + remove_proc_entry("decnet_dev", init_net.proc_net); dn_dev_devices_off(); } diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 3aede1b..f8637f9 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -95,7 +95,7 @@ static u32 dn_neigh_hash(const void *pkey, struct neigh_table dn_neigh_table = { .family = PF_DECnet, - .entry_size = sizeof(struct dn_neigh), + .entry_size = NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)), .key_len = sizeof(__le16), .hash = dn_neigh_hash, .constructor = dn_neigh_construct, @@ -590,11 +590,12 @@ static const struct file_operations dn_neigh_seq_fops = { void __init dn_neigh_init(void) { neigh_table_init(&dn_neigh_table); - proc_net_fops_create(&init_net, "decnet_neigh", S_IRUGO, &dn_neigh_seq_fops); + proc_create("decnet_neigh", S_IRUGO, init_net.proc_net, + &dn_neigh_seq_fops); } void __exit dn_neigh_cleanup(void) { - proc_net_remove(&init_net, "decnet_neigh"); + remove_proc_entry("decnet_neigh", init_net.proc_net); neigh_table_clear(&dn_neigh_table); } diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 8a96047c..1aaa51e 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -598,7 +598,7 @@ void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg, if (reason == 0) reason = le16_to_cpu(scp->discdata_out.opt_status); - dn_nsp_do_disc(sk, msgflg, reason, gfp, sk->sk_dst_cache, ddl, + dn_nsp_do_disc(sk, msgflg, reason, gfp, __sk_dst_get(sk), ddl, scp->discdata_out.opt_data, scp->addrrem, scp->addrloc); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index b57419c..5ac0e15 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1282,7 +1282,7 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int return err; } -int dn_route_output_sock(struct dst_entry **pprt, struct flowidn *fl, struct sock *sk, int flags) +int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *fl, struct sock *sk, int flags) { int err; @@ -1901,7 +1901,8 @@ void __init dn_route_init(void) dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1); - proc_net_fops_create(&init_net, "decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); + proc_create("decnet_cache", S_IRUGO, init_net.proc_net, + &dn_rt_cache_seq_fops); #ifdef CONFIG_DECNET_ROUTER rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, @@ -1917,7 +1918,7 @@ void __exit dn_route_cleanup(void) del_timer(&dn_route_timer); dn_run_flush(0); - proc_net_remove(&init_net, "decnet_cache"); + remove_proc_entry("decnet_cache", init_net.proc_net); dst_entries_destroy(&dn_dst_ops); } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index f968c1b..6c2445b 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -483,7 +483,6 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) unsigned int h, s_h; unsigned int e = 0, s_e; struct dn_fib_table *tb; - struct hlist_node *node; int dumped = 0; if (!net_eq(net, &init_net)) @@ -498,7 +497,7 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) { e = 0; - hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) { + hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) { if (e < s_e) goto next; if (dumped) @@ -828,7 +827,6 @@ out: struct dn_fib_table *dn_fib_get_table(u32 n, int create) { struct dn_fib_table *t; - struct hlist_node *node; unsigned int h; if (n < RT_TABLE_MIN) @@ -839,7 +837,7 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create) h = n & (DN_FIB_TABLE_HASHSZ - 1); rcu_read_lock(); - hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) { + hlist_for_each_entry_rcu(t, &dn_fib_table_hash[h], hlist) { if (t->n == n) { rcu_read_unlock(); return t; @@ -885,11 +883,10 @@ void dn_fib_flush(void) { int flushed = 0; struct dn_fib_table *tb; - struct hlist_node *node; unsigned int h; for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) + hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) flushed += tb->flush(tb); } @@ -908,12 +905,12 @@ void __init dn_fib_table_init(void) void __exit dn_fib_table_cleanup(void) { struct dn_fib_table *t; - struct hlist_node *node, *next; + struct hlist_node *next; unsigned int h; write_lock(&dn_fib_tables_lock); for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h], + hlist_for_each_entry_safe(t, next, &dn_fib_table_hash[h], hlist) { hlist_del(&t->hlist); kfree(t); diff --git a/net/decnet/netfilter/Kconfig b/net/decnet/netfilter/Kconfig index 2f81de5..8d7c109 100644 --- a/net/decnet/netfilter/Kconfig +++ b/net/decnet/netfilter/Kconfig @@ -3,7 +3,7 @@ # menu "DECnet: Netfilter Configuration" - depends on DECNET && NETFILTER && EXPERIMENTAL + depends on DECNET && NETFILTER depends on NETFILTER_ADVANCED config DECNET_NF_GRABULATOR diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 45295ca..2bc62ea 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -80,6 +80,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, int ret; char *name; int i; + bool valid_name_found = false; /* * Probe for switch model. @@ -131,8 +132,13 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, } else { ds->phys_port_mask |= 1 << i; } + valid_name_found = true; } + if (!valid_name_found && i == DSA_MAX_PORTS) { + ret = -EINVAL; + goto out; + } /* * If the CPU connects to this switch, set the switch tree diff --git a/net/dsa/slave.c b/net/dsa/slave.c index e32083d..6ebd8fb 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -41,8 +41,8 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) ds->slave_mii_bus->name = "dsa slave smi"; ds->slave_mii_bus->read = dsa_slave_phy_read; ds->slave_mii_bus->write = dsa_slave_phy_write; - snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s:%.2x", - ds->master_mii_bus->id, ds->pd->sw_addr); + snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x", + ds->index, ds->pd->sw_addr); ds->slave_mii_bus->parent = &ds->master_mii_bus->dev; } @@ -203,10 +203,10 @@ dsa_slave_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) static void dsa_slave_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { - strncpy(drvinfo->driver, "dsa", 32); - strncpy(drvinfo->version, dsa_driver_version, 32); - strncpy(drvinfo->fw_version, "N/A", 32); - strncpy(drvinfo->bus_info, "platform", 32); + strlcpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, dsa_driver_version, sizeof(drvinfo->version)); + strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version)); + strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info)); } static int dsa_slave_nway_reset(struct net_device *dev) @@ -391,7 +391,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, if (p->phy != NULL) { phy_attach(slave_dev, dev_name(&p->phy->dev), - 0, PHY_INTERFACE_MODE_GMII); + PHY_INTERFACE_MODE_GMII); p->phy->autoneg = AUTONEG_ENABLE; p->phy->speed = 0; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 4efad53..a36c85ea 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -272,6 +272,36 @@ void eth_header_cache_update(struct hh_cache *hh, EXPORT_SYMBOL(eth_header_cache_update); /** + * eth_prepare_mac_addr_change - prepare for mac change + * @dev: network device + * @p: socket address + */ +int eth_prepare_mac_addr_change(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + + if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev)) + return -EBUSY; + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + return 0; +} +EXPORT_SYMBOL(eth_prepare_mac_addr_change); + +/** + * eth_commit_mac_addr_change - commit mac change + * @dev: network device + * @p: socket address + */ +void eth_commit_mac_addr_change(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); +} +EXPORT_SYMBOL(eth_commit_mac_addr_change); + +/** * eth_mac_addr - set new Ethernet hardware address * @dev: network device * @p: socket address @@ -283,15 +313,12 @@ EXPORT_SYMBOL(eth_header_cache_update); */ int eth_mac_addr(struct net_device *dev, void *p) { - struct sockaddr *addr = p; + int ret; - if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev)) - return -EBUSY; - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); - /* if device marked as NET_ADDR_RANDOM, reset it */ - dev->addr_assign_type &= ~NET_ADDR_RANDOM; + ret = eth_prepare_mac_addr_change(dev, p); + if (ret < 0) + return ret; + eth_commit_mac_addr_change(dev, p); return 0; } EXPORT_SYMBOL(eth_mac_addr); diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index f651da6..43b95ca 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -377,17 +377,14 @@ static int lowpan_header_create(struct sk_buff *skb, struct ipv6hdr *hdr; const u8 *saddr = _saddr; const u8 *daddr = _daddr; - u8 *head; + u8 head[100]; struct ieee802154_addr sa, da; + /* TODO: + * if this package isn't ipv6 one, where should it be routed? + */ if (type != ETH_P_IPV6) return 0; - /* TODO: - * if this package isn't ipv6 one, where should it be routed? - */ - head = kzalloc(100, GFP_KERNEL); - if (head == NULL) - return -ENOMEM; hdr = ipv6_hdr(skb); hc06_ptr = head + 2; @@ -561,8 +558,6 @@ static int lowpan_header_create(struct sk_buff *skb, skb_pull(skb, sizeof(struct ipv6hdr)); memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head); - kfree(head); - lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); @@ -594,10 +589,32 @@ static int lowpan_header_create(struct sk_buff *skb, } } +static int lowpan_give_skb_to_devices(struct sk_buff *skb) +{ + struct lowpan_dev_record *entry; + struct sk_buff *skb_cp; + int stat = NET_RX_SUCCESS; + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &lowpan_devices, list) + if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) { + skb_cp = skb_copy(skb, GFP_ATOMIC); + if (!skb_cp) { + stat = -ENOMEM; + break; + } + + skb_cp->dev = entry->ldev; + stat = netif_rx(skb_cp); + } + rcu_read_unlock(); + + return stat; +} + static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr) { struct sk_buff *new; - struct lowpan_dev_record *entry; int stat = NET_RX_SUCCESS; new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb), @@ -614,19 +631,7 @@ static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr) new->protocol = htons(ETH_P_IPV6); new->pkt_type = PACKET_HOST; - rcu_read_lock(); - list_for_each_entry_rcu(entry, &lowpan_devices, list) - if (lowpan_dev_info(entry->ldev)->real_dev == new->dev) { - skb = skb_copy(new, GFP_ATOMIC); - if (!skb) { - stat = -ENOMEM; - break; - } - - skb->dev = entry->ldev; - stat = netif_rx(skb); - } - rcu_read_unlock(); + stat = lowpan_give_skb_to_devices(new); kfree_skb(new); @@ -1137,19 +1142,42 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, goto drop; /* check that it's our buffer */ - switch (skb->data[0] & 0xe0) { - case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ - case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ - case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ - local_skb = skb_clone(skb, GFP_ATOMIC); + if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { + /* Copy the packet so that the IPv6 header is + * properly aligned. + */ + local_skb = skb_copy_expand(skb, NET_SKB_PAD - 1, + skb_tailroom(skb), GFP_ATOMIC); if (!local_skb) goto drop; - lowpan_process_data(local_skb); + local_skb->protocol = htons(ETH_P_IPV6); + local_skb->pkt_type = PACKET_HOST; + + /* Pull off the 1-byte of 6lowpan header. */ + skb_pull(local_skb, 1); + skb_reset_network_header(local_skb); + skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); + + lowpan_give_skb_to_devices(local_skb); + + kfree_skb(local_skb); kfree_skb(skb); - break; - default: - break; + } else { + switch (skb->data[0] & 0xe0) { + case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ + case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ + case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ + local_skb = skb_clone(skb, GFP_ATOMIC); + if (!local_skb) + goto drop; + lowpan_process_data(local_skb); + + kfree_skb(skb); + break; + default: + break; + } } return NET_RX_SUCCESS; @@ -1234,7 +1262,7 @@ static inline int __init lowpan_netlink_init(void) return rtnl_link_register(&lowpan_link_ops); } -static inline void __init lowpan_netlink_fini(void) +static inline void lowpan_netlink_fini(void) { rtnl_link_unregister(&lowpan_link_ops); } diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h index 8c2251f..bba5f83 100644 --- a/net/ieee802154/6lowpan.h +++ b/net/ieee802154/6lowpan.h @@ -84,7 +84,7 @@ (memcmp(addr1, addr2, length >> 3) == 0) /* local link, i.e. FE80::/10 */ -#define is_addr_link_local(a) (((a)->s6_addr16[0]) == 0x80FE) +#define is_addr_link_local(a) (((a)->s6_addr16[0]) == htons(0xFE80)) /* * check whether we can compress the IID to 16 bits, diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig index 7dee650..b2e06df 100644 --- a/net/ieee802154/Kconfig +++ b/net/ieee802154/Kconfig @@ -1,6 +1,5 @@ config IEEE802154 - tristate "IEEE Std 802.15.4 Low-Rate Wireless Personal Area Networks support (EXPERIMENTAL)" - depends on EXPERIMENTAL + tristate "IEEE Std 802.15.4 Low-Rate Wireless Personal Area Networks support" ---help--- IEEE Std 802.15.4 defines a low data rate, low power and low complexity short range wireless personal area networks. It was diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 1670561..e0da175 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -350,7 +350,6 @@ static inline int ieee802154_match_sock(u8 *hw_addr, u16 pan_id, int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) { struct sock *sk, *prev = NULL; - struct hlist_node *node; int ret = NET_RX_SUCCESS; u16 pan_id, short_addr; @@ -361,7 +360,7 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev); read_lock(&dgram_lock); - sk_for_each(sk, node, &dgram_head) { + sk_for_each(sk, &dgram_head) { if (ieee802154_match_sock(dev->dev_addr, pan_id, short_addr, dgram_sk(sk))) { if (prev) { diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c index 50e8239..41f538b 100644 --- a/net/ieee802154/raw.c +++ b/net/ieee802154/raw.c @@ -221,10 +221,9 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) { struct sock *sk; - struct hlist_node *node; read_lock(&raw_lock); - sk_for_each(sk, node, &raw_head) { + sk_for_each(sk, &raw_head) { bh_lock_sock(sk); if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) { diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index 1627ef2..13571ea 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -91,7 +91,7 @@ static struct class wpan_phy_class = { static DEFINE_MUTEX(wpan_phy_mutex); static int wpan_phy_idx; -static int wpan_phy_match(struct device *dev, void *data) +static int wpan_phy_match(struct device *dev, const void *data) { return !strcmp(dev_name(dev), (const char *)data); } @@ -103,8 +103,7 @@ struct wpan_phy *wpan_phy_find(const char *str) if (WARN_ON(!str)) return NULL; - dev = class_find_device(&wpan_phy_class, NULL, - (void *)str, wpan_phy_match); + dev = class_find_device(&wpan_phy_class, NULL, str, wpan_phy_match); if (!dev) return NULL; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 5a19aeb..7944df7 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -488,7 +488,6 @@ config TCP_CONG_HTCP config TCP_CONG_HSTCP tristate "High Speed TCP" - depends on EXPERIMENTAL default n ---help--- Sally Floyd's High Speed TCP (RFC 3649) congestion control. @@ -499,7 +498,6 @@ config TCP_CONG_HSTCP config TCP_CONG_HYBLA tristate "TCP-Hybla congestion control algorithm" - depends on EXPERIMENTAL default n ---help--- TCP-Hybla is a sender-side only change that eliminates penalization of @@ -509,7 +507,6 @@ config TCP_CONG_HYBLA config TCP_CONG_VEGAS tristate "TCP Vegas" - depends on EXPERIMENTAL default n ---help--- TCP Vegas is a sender-side only change to TCP that anticipates @@ -520,7 +517,6 @@ config TCP_CONG_VEGAS config TCP_CONG_SCALABLE tristate "Scalable TCP" - depends on EXPERIMENTAL default n ---help--- Scalable TCP is a sender-side only change to TCP which uses a @@ -530,7 +526,6 @@ config TCP_CONG_SCALABLE config TCP_CONG_LP tristate "TCP Low Priority" - depends on EXPERIMENTAL default n ---help--- TCP Low Priority (TCP-LP), a distributed algorithm whose goal is @@ -540,7 +535,6 @@ config TCP_CONG_LP config TCP_CONG_VENO tristate "TCP Veno" - depends on EXPERIMENTAL default n ---help--- TCP Veno is a sender-side only enhancement of TCP to obtain better @@ -552,7 +546,6 @@ config TCP_CONG_VENO config TCP_CONG_YEAH tristate "YeAH TCP" - depends on EXPERIMENTAL select TCP_CONG_VEGAS default n ---help--- @@ -567,7 +560,6 @@ config TCP_CONG_YEAH config TCP_CONG_ILLINOIS tristate "TCP Illinois" - depends on EXPERIMENTAL default n ---help--- TCP-Illinois is a sender-side modification of TCP Reno for @@ -631,8 +623,7 @@ config DEFAULT_TCP_CONG default "cubic" config TCP_MD5SIG - bool "TCP: MD5 Signature Option support (RFC2385) (EXPERIMENTAL)" - depends on EXPERIMENTAL + bool "TCP: MD5 Signature Option support (RFC2385)" select CRYPTO select CRYPTO_MD5 ---help--- diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 24b384b..c929d9c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -248,8 +248,12 @@ EXPORT_SYMBOL(inet_listen); u32 inet_ehash_secret __read_mostly; EXPORT_SYMBOL(inet_ehash_secret); +u32 ipv6_hash_secret __read_mostly; +EXPORT_SYMBOL(ipv6_hash_secret); + /* - * inet_ehash_secret must be set exactly once + * inet_ehash_secret must be set exactly once, and to a non nul value + * ipv6_hash_secret must be set exactly once. */ void build_ehash_secret(void) { @@ -259,25 +263,11 @@ void build_ehash_secret(void) get_random_bytes(&rnd, sizeof(rnd)); } while (rnd == 0); - cmpxchg(&inet_ehash_secret, 0, rnd); + if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) + get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); } EXPORT_SYMBOL(build_ehash_secret); -static inline int inet_netns_ok(struct net *net, __u8 protocol) -{ - const struct net_protocol *ipprot; - - if (net_eq(net, &init_net)) - return 1; - - ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot == NULL) { - /* raw IP is OK */ - return 1; - } - return ipprot->netns_ok; -} - /* * Create an inet socket. */ @@ -350,10 +340,6 @@ lookup_protocol: !ns_capable(net->user_ns, CAP_NET_RAW)) goto out_rcu_unlock; - err = -EAFNOSUPPORT; - if (!inet_netns_ok(net, protocol)) - goto out_rcu_unlock; - sock->ops = answer->ops; answer_prot = answer->prot; answer_no_check = answer->no_check; @@ -1306,6 +1292,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_GRE | 0))) goto out; @@ -1333,7 +1320,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ops->callbacks.gso_segment(skb, features); rcu_read_unlock(); - if (!segs || IS_ERR(segs)) + if (IS_ERR_OR_NULL(segs)) goto out; skb = segs; @@ -1345,8 +1332,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (skb->next != NULL) iph->frag_off |= htons(IP_MF); offset += (skb->len - skb->mac_len - iph->ihl * 4); - } else + } else { iph->id = htons(id++); + } iph->tot_len = htons(skb->len - skb->mac_len); iph->check = 0; iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); @@ -1590,7 +1578,7 @@ static const struct net_offload udp_offload = { static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, - .err_handler = ping_err, + .err_handler = icmp_err, .no_policy = 1, .netns_ok = 1, }; @@ -1705,12 +1693,11 @@ static struct packet_type ip_packet_type __read_mostly = { static int __init inet_init(void) { - struct sk_buff *dummy_skb; struct inet_protosw *q; struct list_head *r; int rc = -EINVAL; - BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); + BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); if (!sysctl_local_reserved_ports) diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index a0d8392..2e7f194 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -269,7 +269,11 @@ static void ah_input_done(struct crypto_async_request *base, int err) skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, ihl); __skb_pull(skb, ah_hlen + ihl); - skb_set_transport_header(skb, -ihl); + + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -ihl); out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); @@ -317,8 +321,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) /* We are going to _remove_ AH header to keep sockets happy, * so... Later this can change. */ - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto out; skb->ip_summed = CHECKSUM_NONE; @@ -381,7 +384,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, ihl); __skb_pull(skb, ah_hlen + ihl); - skb_set_transport_header(skb, -ihl); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -ihl); err = nexthdr; @@ -413,9 +419,12 @@ static void ah4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { + atomic_inc(&flow_cache_genid); + rt_genid_bump(net); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); - else + } else ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index ce6fbdf..fea4929 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -321,7 +321,7 @@ static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb) static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) { __be32 saddr = 0; - u8 *dst_ha = NULL; + u8 dst_ha[MAX_ADDR_LEN], *dst_hw = NULL; struct net_device *dev = neigh->dev; __be32 target = *(__be32 *)neigh->primary_key; int probes = atomic_read(&neigh->probes); @@ -363,8 +363,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) if (probes < 0) { if (!(neigh->nud_state & NUD_VALID)) pr_debug("trying to ucast probe in NUD_INVALID\n"); - dst_ha = neigh->ha; - read_lock_bh(&neigh->lock); + neigh_ha_snapshot(dst_ha, neigh, dev); + dst_hw = dst_ha; } else { probes -= neigh->parms->app_probes; if (probes < 0) { @@ -376,9 +376,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) } arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, - dst_ha, dev->dev_addr, NULL); - if (dst_ha) - read_unlock_bh(&neigh->lock); + dst_hw, dev->dev_addr, NULL); } static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) @@ -930,24 +928,25 @@ static void parp_redo(struct sk_buff *skb) static int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct arphdr *arp; + const struct arphdr *arp; + + if (dev->flags & IFF_NOARP || + skb->pkt_type == PACKET_OTHERHOST || + skb->pkt_type == PACKET_LOOPBACK) + goto freeskb; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto out_of_mem; /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ if (!pskb_may_pull(skb, arp_hdr_len(dev))) goto freeskb; arp = arp_hdr(skb); - if (arp->ar_hln != dev->addr_len || - dev->flags & IFF_NOARP || - skb->pkt_type == PACKET_OTHERHOST || - skb->pkt_type == PACKET_LOOPBACK || - arp->ar_pln != 4) + if (arp->ar_hln != dev->addr_len || arp->ar_pln != 4) goto freeskb; - skb = skb_share_check(skb, GFP_ATOMIC); - if (skb == NULL) - goto out_of_mem; - memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); @@ -1406,14 +1405,14 @@ static const struct file_operations arp_seq_fops = { static int __net_init arp_net_init(struct net *net) { - if (!proc_net_fops_create(net, "arp", S_IRUGO, &arp_seq_fops)) + if (!proc_create("arp", S_IRUGO, net->proc_net, &arp_seq_fops)) return -ENOMEM; return 0; } static void __net_exit arp_net_exit(struct net *net) { - proc_net_remove(net, "arp"); + remove_proc_entry("arp", net->proc_net); } static struct pernet_operations arp_net_ops = { diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 424fafb..b28e863 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -85,3 +85,28 @@ out: return err; } EXPORT_SYMBOL(ip4_datagram_connect); + +void ip4_datagram_release_cb(struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct ip_options_rcu *inet_opt; + __be32 daddr = inet->inet_daddr; + struct flowi4 fl4; + struct rtable *rt; + + if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0)) + return; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr, + inet->inet_saddr, inet->inet_dport, + inet->inet_sport, sk->sk_protocol, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); + if (!IS_ERR(rt)) + __sk_dst_set(sk, &rt->dst); + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(ip4_datagram_release_cb); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index cc06a47..c6287cd 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -63,6 +63,7 @@ #include <net/ip_fib.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> +#include <net/addrconf.h> #include "fib_lookup.h" @@ -93,6 +94,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_ADDRESS] = { .type = NLA_U32 }, [IFA_BROADCAST] = { .type = NLA_U32 }, [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, + [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, }; #define IN4_ADDR_HSIZE_SHIFT 8 @@ -137,10 +139,9 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) u32 hash = inet_addr_hash(net, addr); struct net_device *result = NULL; struct in_ifaddr *ifa; - struct hlist_node *node; rcu_read_lock(); - hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { + hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) { if (ifa->ifa_local == addr) { struct net_device *dev = ifa->ifa_dev->dev; @@ -417,6 +418,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); } +static void check_lifetime(struct work_struct *work); + +static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); + static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid) { @@ -462,6 +467,9 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, inet_hash_insert(dev_net(in_dev->dev), ifa); + cancel_delayed_work(&check_lifetime_work); + schedule_delayed_work(&check_lifetime_work, 0); + /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ @@ -573,7 +581,131 @@ errout: return err; } -static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) +#define INFINITY_LIFE_TIME 0xFFFFFFFF + +static void check_lifetime(struct work_struct *work) +{ + unsigned long now, next, next_sec, next_sched; + struct in_ifaddr *ifa; + struct hlist_node *n; + int i; + + now = jiffies; + next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); + + for (i = 0; i < IN4_ADDR_HSIZE; i++) { + bool change_needed = false; + + rcu_read_lock(); + hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { + unsigned long age; + + if (ifa->ifa_flags & IFA_F_PERMANENT) + continue; + + /* We try to batch several events at once. */ + age = (now - ifa->ifa_tstamp + + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + + if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && + age >= ifa->ifa_valid_lft) { + change_needed = true; + } else if (ifa->ifa_preferred_lft == + INFINITY_LIFE_TIME) { + continue; + } else if (age >= ifa->ifa_preferred_lft) { + if (time_before(ifa->ifa_tstamp + + ifa->ifa_valid_lft * HZ, next)) + next = ifa->ifa_tstamp + + ifa->ifa_valid_lft * HZ; + + if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) + change_needed = true; + } else if (time_before(ifa->ifa_tstamp + + ifa->ifa_preferred_lft * HZ, + next)) { + next = ifa->ifa_tstamp + + ifa->ifa_preferred_lft * HZ; + } + } + rcu_read_unlock(); + if (!change_needed) + continue; + rtnl_lock(); + hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) { + unsigned long age; + + if (ifa->ifa_flags & IFA_F_PERMANENT) + continue; + + /* We try to batch several events at once. */ + age = (now - ifa->ifa_tstamp + + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + + if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && + age >= ifa->ifa_valid_lft) { + struct in_ifaddr **ifap; + + for (ifap = &ifa->ifa_dev->ifa_list; + *ifap != NULL; ifap = &(*ifap)->ifa_next) { + if (*ifap == ifa) { + inet_del_ifa(ifa->ifa_dev, + ifap, 1); + break; + } + } + } else if (ifa->ifa_preferred_lft != + INFINITY_LIFE_TIME && + age >= ifa->ifa_preferred_lft && + !(ifa->ifa_flags & IFA_F_DEPRECATED)) { + ifa->ifa_flags |= IFA_F_DEPRECATED; + rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); + } + } + rtnl_unlock(); + } + + next_sec = round_jiffies_up(next); + next_sched = next; + + /* If rounded timeout is accurate enough, accept it. */ + if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ)) + next_sched = next_sec; + + now = jiffies; + /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */ + if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) + next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; + + schedule_delayed_work(&check_lifetime_work, next_sched - now); +} + +static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, + __u32 prefered_lft) +{ + unsigned long timeout; + + ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); + + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) + ifa->ifa_valid_lft = timeout; + else + ifa->ifa_flags |= IFA_F_PERMANENT; + + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa->ifa_flags |= IFA_F_DEPRECATED; + ifa->ifa_preferred_lft = timeout; + } + ifa->ifa_tstamp = jiffies; + if (!ifa->ifa_cstamp) + ifa->ifa_cstamp = ifa->ifa_tstamp; +} + +static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, + __u32 *pvalid_lft, __u32 *pprefered_lft) { struct nlattr *tb[IFA_MAX+1]; struct in_ifaddr *ifa; @@ -633,24 +765,77 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + if (tb[IFA_CACHEINFO]) { + struct ifa_cacheinfo *ci; + + ci = nla_data(tb[IFA_CACHEINFO]); + if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { + err = -EINVAL; + goto errout; + } + *pvalid_lft = ci->ifa_valid; + *pprefered_lft = ci->ifa_prefered; + } + return ifa; errout: return ERR_PTR(err); } +static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) +{ + struct in_device *in_dev = ifa->ifa_dev; + struct in_ifaddr *ifa1, **ifap; + + if (!ifa->ifa_local) + return NULL; + + for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; + ifap = &ifa1->ifa_next) { + if (ifa1->ifa_mask == ifa->ifa_mask && + inet_ifa_match(ifa1->ifa_address, ifa) && + ifa1->ifa_local == ifa->ifa_local) + return ifa1; + } + return NULL; +} + static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct in_ifaddr *ifa; + struct in_ifaddr *ifa_existing; + __u32 valid_lft = INFINITY_LIFE_TIME; + __u32 prefered_lft = INFINITY_LIFE_TIME; ASSERT_RTNL(); - ifa = rtm_to_ifaddr(net, nlh); + ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft); if (IS_ERR(ifa)) return PTR_ERR(ifa); - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); + ifa_existing = find_matching_ifa(ifa); + if (!ifa_existing) { + /* It would be best to check for !NLM_F_CREATE here but + * userspace alreay relies on not having to provide this. + */ + set_ifa_lifetime(ifa, valid_lft, prefered_lft); + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); + } else { + inet_free_ifa(ifa); + + if (nlh->nlmsg_flags & NLM_F_EXCL || + !(nlh->nlmsg_flags & NLM_F_REPLACE)) + return -EEXIST; + ifa = ifa_existing; + set_ifa_lifetime(ifa, valid_lft, prefered_lft); + cancel_delayed_work(&check_lifetime_work); + schedule_delayed_work(&check_lifetime_work, 0); + rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); + blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); + } + return 0; } /* @@ -823,9 +1008,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ifa) { ret = -ENOBUFS; ifa = inet_alloc_ifa(); - INIT_HLIST_NODE(&ifa->hash); if (!ifa) break; + INIT_HLIST_NODE(&ifa->hash); if (colon) memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); else @@ -852,6 +1037,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) ifa->ifa_prefixlen = 32; ifa->ifa_mask = inet_make_mask(32); } + set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); ret = inet_set_ifa(dev, ifa); break; @@ -1190,6 +1376,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ifa->ifa_dev = in_dev; ifa->ifa_scope = RT_SCOPE_HOST; memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, + INFINITY_LIFE_TIME); inet_insert_ifa(ifa); } } @@ -1246,11 +1434,30 @@ static size_t inet_nlmsg_size(void) + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ } +static inline u32 cstamp_delta(unsigned long cstamp) +{ + return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; +} + +static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, + unsigned long tstamp, u32 preferred, u32 valid) +{ + struct ifa_cacheinfo ci; + + ci.cstamp = cstamp_delta(cstamp); + ci.tstamp = cstamp_delta(tstamp); + ci.ifa_prefered = preferred; + ci.ifa_valid = valid; + + return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); +} + static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; + u32 preferred, valid; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) @@ -1259,10 +1466,31 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; - ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; + ifm->ifa_flags = ifa->ifa_flags; ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; + if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { + preferred = ifa->ifa_preferred_lft; + valid = ifa->ifa_valid_lft; + if (preferred != INFINITY_LIFE_TIME) { + long tval = (jiffies - ifa->ifa_tstamp) / HZ; + + if (preferred > tval) + preferred -= tval; + else + preferred = 0; + if (valid != INFINITY_LIFE_TIME) { + if (valid > tval) + valid -= tval; + else + valid = 0; + } + } + } else { + preferred = INFINITY_LIFE_TIME; + valid = INFINITY_LIFE_TIME; + } if ((ifa->ifa_address && nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) || (ifa->ifa_local && @@ -1270,7 +1498,9 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, (ifa->ifa_broadcast && nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || (ifa->ifa_label[0] && - nla_put_string(skb, IFA_LABEL, ifa->ifa_label))) + nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || + put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, + preferred, valid)) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -1290,7 +1520,6 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) struct in_device *in_dev; struct in_ifaddr *ifa; struct hlist_head *head; - struct hlist_node *node; s_h = cb->args[0]; s_idx = idx = cb->args[1]; @@ -1300,7 +1529,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; if (h > s_h || idx > s_idx) @@ -1988,6 +2217,8 @@ void __init devinet_init(void) register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); + schedule_delayed_work(&check_lifetime_work, 0); + rtnl_af_register(&inet_af_ops); rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b61e9de..4cfe34d 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -139,8 +139,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) /* skb is pure payload to encrypt */ - err = -ENOMEM; - esp = x->data; aead = esp->aead; alen = crypto_aead_authsize(aead); @@ -176,8 +174,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); - if (!tmp) + if (!tmp) { + err = -ENOMEM; goto error; + } seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); @@ -346,7 +346,10 @@ static int esp_input_done2(struct sk_buff *skb, int err) pskb_trim(skb, skb->len - alen - padlen - 2); __skb_pull(skb, hlen); - skb_set_transport_header(skb, -ihl); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -ihl); err = nexthdr[1]; @@ -499,9 +502,12 @@ static void esp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { + atomic_inc(&flow_cache_genid); + rt_genid_bump(net); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); - else + } else ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 5cd75e2..eb4bb12 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -112,7 +112,6 @@ struct fib_table *fib_new_table(struct net *net, u32 id) struct fib_table *fib_get_table(struct net *net, u32 id) { struct fib_table *tb; - struct hlist_node *node; struct hlist_head *head; unsigned int h; @@ -122,7 +121,7 @@ struct fib_table *fib_get_table(struct net *net, u32 id) rcu_read_lock(); head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist) { if (tb->tb_id == id) { rcu_read_unlock(); return tb; @@ -137,13 +136,12 @@ static void fib_flush(struct net *net) { int flushed = 0; struct fib_table *tb; - struct hlist_node *node; struct hlist_head *head; unsigned int h; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, node, head, tb_hlist) + hlist_for_each_entry(tb, head, tb_hlist) flushed += fib_table_flush(tb); } @@ -656,7 +654,6 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) unsigned int h, s_h; unsigned int e = 0, s_e; struct fib_table *tb; - struct hlist_node *node; struct hlist_head *head; int dumped = 0; @@ -670,7 +667,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, node, head, tb_hlist) { + hlist_for_each_entry(tb, head, tb_hlist) { if (e < s_e) goto next; if (dumped) @@ -974,7 +971,7 @@ static void nl_fib_input(struct sk_buff *skb) nl_fib_lookup(frn, tb); - portid = NETLINK_CB(skb).portid; /* pid of sending process */ + portid = NETLINK_CB(skb).portid; /* netlink portid */ NETLINK_CB(skb).portid = 0; /* from kernel */ NETLINK_CB(skb).dst_group = 0; /* unicast */ netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT); @@ -1117,11 +1114,11 @@ static void ip_fib_net_exit(struct net *net) for (i = 0; i < FIB_TABLE_HASHSZ; i++) { struct fib_table *tb; struct hlist_head *head; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; head = &net->ipv4.fib_table_hash[i]; - hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { - hlist_del(node); + hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { + hlist_del(&tb->tb_hlist); fib_table_flush(tb); fib_free_table(tb); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4797a80..8f6cb7a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -298,14 +298,13 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) static struct fib_info *fib_find_info(const struct fib_info *nfi) { struct hlist_head *head; - struct hlist_node *node; struct fib_info *fi; unsigned int hash; hash = fib_info_hashfn(nfi); head = &fib_info_hash[hash]; - hlist_for_each_entry(fi, node, head, fib_hash) { + hlist_for_each_entry(fi, head, fib_hash) { if (!net_eq(fi->fib_net, nfi->fib_net)) continue; if (fi->fib_nhs != nfi->fib_nhs) @@ -331,7 +330,6 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; - struct hlist_node *node; struct fib_nh *nh; unsigned int hash; @@ -339,7 +337,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) hash = fib_devindex_hashfn(dev->ifindex); head = &fib_info_devhash[hash]; - hlist_for_each_entry(nh, node, head, nh_hash) { + hlist_for_each_entry(nh, head, nh_hash) { if (nh->nh_dev == dev && nh->nh_gw == gw && !(nh->nh_flags & RTNH_F_DEAD)) { @@ -721,10 +719,10 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, for (i = 0; i < old_size; i++) { struct hlist_head *head = &fib_info_hash[i]; - struct hlist_node *node, *n; + struct hlist_node *n; struct fib_info *fi; - hlist_for_each_entry_safe(fi, node, n, head, fib_hash) { + hlist_for_each_entry_safe(fi, n, head, fib_hash) { struct hlist_head *dest; unsigned int new_hash; @@ -739,10 +737,10 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, for (i = 0; i < old_size; i++) { struct hlist_head *lhead = &fib_info_laddrhash[i]; - struct hlist_node *node, *n; + struct hlist_node *n; struct fib_info *fi; - hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) { + hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { struct hlist_head *ldest; unsigned int new_hash; @@ -1096,13 +1094,12 @@ int fib_sync_down_addr(struct net *net, __be32 local) int ret = 0; unsigned int hash = fib_laddr_hashfn(local); struct hlist_head *head = &fib_info_laddrhash[hash]; - struct hlist_node *node; struct fib_info *fi; if (fib_info_laddrhash == NULL || local == 0) return 0; - hlist_for_each_entry(fi, node, head, fib_lhash) { + hlist_for_each_entry(fi, head, fib_lhash) { if (!net_eq(fi->fib_net, net)) continue; if (fi->fib_prefsrc == local) { @@ -1120,13 +1117,12 @@ int fib_sync_down_dev(struct net_device *dev, int force) struct fib_info *prev_fi = NULL; unsigned int hash = fib_devindex_hashfn(dev->ifindex); struct hlist_head *head = &fib_info_devhash[hash]; - struct hlist_node *node; struct fib_nh *nh; if (force) scope = -1; - hlist_for_each_entry(nh, node, head, nh_hash) { + hlist_for_each_entry(nh, head, nh_hash) { struct fib_info *fi = nh->nh_parent; int dead; @@ -1232,7 +1228,6 @@ int fib_sync_up(struct net_device *dev) struct fib_info *prev_fi; unsigned int hash; struct hlist_head *head; - struct hlist_node *node; struct fib_nh *nh; int ret; @@ -1244,7 +1239,7 @@ int fib_sync_up(struct net_device *dev) head = &fib_info_devhash[hash]; ret = 0; - hlist_for_each_entry(nh, node, head, nh_hash) { + hlist_for_each_entry(nh, head, nh_hash) { struct fib_info *fi = nh->nh_parent; int alive; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 31d771c..ff06b75 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -920,10 +920,9 @@ nomem: static struct leaf_info *find_leaf_info(struct leaf *l, int plen) { struct hlist_head *head = &l->list; - struct hlist_node *node; struct leaf_info *li; - hlist_for_each_entry_rcu(li, node, head, hlist) + hlist_for_each_entry_rcu(li, head, hlist) if (li->plen == plen) return li; @@ -943,12 +942,11 @@ static inline struct list_head *get_fa_head(struct leaf *l, int plen) static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) { struct leaf_info *li = NULL, *last = NULL; - struct hlist_node *node; if (hlist_empty(head)) { hlist_add_head_rcu(&new->hlist, head); } else { - hlist_for_each_entry(li, node, head, hlist) { + hlist_for_each_entry(li, head, hlist) { if (new->plen > li->plen) break; @@ -1354,9 +1352,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, { struct leaf_info *li; struct hlist_head *hhead = &l->list; - struct hlist_node *node; - hlist_for_each_entry_rcu(li, node, hhead, hlist) { + hlist_for_each_entry_rcu(li, hhead, hlist) { struct fib_alias *fa; if (l->key != (key & li->mask_plen)) @@ -1740,10 +1737,10 @@ static int trie_flush_leaf(struct leaf *l) { int found = 0; struct hlist_head *lih = &l->list; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; struct leaf_info *li = NULL; - hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { + hlist_for_each_entry_safe(li, tmp, lih, hlist) { found += trie_flush_list(&li->falh); if (list_empty(&li->falh)) { @@ -1895,14 +1892,13 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) { struct leaf_info *li; - struct hlist_node *node; int i, s_i; s_i = cb->args[4]; i = 0; /* rcu_read_lock is hold by caller */ - hlist_for_each_entry_rcu(li, node, &l->list, hlist) { + hlist_for_each_entry_rcu(li, &l->list, hlist) { if (i < s_i) { i++; continue; @@ -2092,14 +2088,13 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) if (IS_LEAF(n)) { struct leaf *l = (struct leaf *)n; struct leaf_info *li; - struct hlist_node *tmp; s->leaves++; s->totdepth += iter.depth; if (iter.depth > s->maxdepth) s->maxdepth = iter.depth; - hlist_for_each_entry_rcu(li, tmp, &l->list, hlist) + hlist_for_each_entry_rcu(li, &l->list, hlist) ++s->prefixes; } else { const struct tnode *tn = (const struct tnode *) n; @@ -2200,10 +2195,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; - struct hlist_node *node; struct fib_table *tb; - hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist) { struct trie *t = (struct trie *) tb->tb_data; struct trie_stat stat; @@ -2245,10 +2239,9 @@ static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; - struct hlist_node *node; struct fib_table *tb; - hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist) { struct rt_trie_node *n; for (n = fib_trie_get_first(iter, @@ -2298,7 +2291,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) /* new hash chain */ while (++h < FIB_TABLE_HASHSZ) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist) { n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); if (n) goto found; @@ -2381,13 +2374,12 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) } else { struct leaf *l = (struct leaf *) n; struct leaf_info *li; - struct hlist_node *node; __be32 val = htonl(l->key); seq_indent(seq, iter->depth); seq_printf(seq, " |-- %pI4\n", &val); - hlist_for_each_entry_rcu(li, node, &l->list, hlist) { + hlist_for_each_entry_rcu(li, &l->list, hlist) { struct fib_alias *fa; list_for_each_entry_rcu(fa, &li->falh, fa_list) { @@ -2532,7 +2524,6 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) { struct leaf *l = v; struct leaf_info *li; - struct hlist_node *node; if (v == SEQ_START_TOKEN) { seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " @@ -2541,7 +2532,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) return 0; } - hlist_for_each_entry_rcu(li, node, &l->list, hlist) { + hlist_for_each_entry_rcu(li, &l->list, hlist) { struct fib_alias *fa; __be32 mask, prefix; @@ -2607,31 +2598,31 @@ static const struct file_operations fib_route_fops = { int __net_init fib_proc_init(struct net *net) { - if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops)) + if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops)) goto out1; - if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO, - &fib_triestat_fops)) + if (!proc_create("fib_triestat", S_IRUGO, net->proc_net, + &fib_triestat_fops)) goto out2; - if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops)) + if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops)) goto out3; return 0; out3: - proc_net_remove(net, "fib_triestat"); + remove_proc_entry("fib_triestat", net->proc_net); out2: - proc_net_remove(net, "fib_trie"); + remove_proc_entry("fib_trie", net->proc_net); out1: return -ENOMEM; } void __net_exit fib_proc_exit(struct net *net) { - proc_net_remove(net, "fib_trie"); - proc_net_remove(net, "fib_triestat"); - proc_net_remove(net, "route"); + remove_proc_entry("fib_trie", net->proc_net); + remove_proc_entry("fib_triestat", net->proc_net); + remove_proc_entry("route", net->proc_net); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 42a4910..7a4c710 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -19,6 +19,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/netdevice.h> +#include <linux/if_tunnel.h> #include <linux/spinlock.h> #include <net/protocol.h> #include <net/gre.h> @@ -26,6 +27,11 @@ static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; static DEFINE_SPINLOCK(gre_proto_lock); +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +}; +#define GRE_HEADER_SECTION 4 int gre_add_protocol(const struct gre_protocol *proto, u8 version) { @@ -112,12 +118,117 @@ static void gre_err(struct sk_buff *skb, u32 info) rcu_read_unlock(); } +static struct sk_buff *gre_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t enc_features; + int ghl = GRE_HEADER_SECTION; + struct gre_base_hdr *greh; + int mac_len = skb->mac_len; + int tnl_hlen; + bool csum; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_TCPV6 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_GRE))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) + goto out; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + + if (greh->flags & GRE_KEY) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_SEQ) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_CSUM) { + ghl += GRE_HEADER_SECTION; + csum = true; + } else + csum = false; + + /* setup inner skb. */ + if (greh->protocol == htons(ETH_P_TEB)) { + struct ethhdr *eth = eth_hdr(skb); + skb->protocol = eth->h_proto; + } else { + skb->protocol = greh->protocol; + } + + skb->encapsulation = 0; + + if (unlikely(!pskb_may_pull(skb, ghl))) + goto out; + __skb_pull(skb, ghl); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) + goto out; + + skb = segs; + tnl_hlen = skb_tnl_header_len(skb); + do { + __skb_push(skb, ghl); + if (csum) { + __be32 *pcsum; + + if (skb_has_shared_frag(skb)) { + int err; + + err = __skb_linearize(skb); + if (err) { + kfree_skb(segs); + segs = ERR_PTR(err); + goto out; + } + } + + greh = (struct gre_base_hdr *)(skb->data); + pcsum = (__be32 *)(greh + 1); + *pcsum = 0; + *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); + } + __skb_push(skb, tnl_hlen - ghl); + + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb->mac_len = mac_len; + } while ((skb = skb->next)); +out: + return segs; +} + +static int gre_gso_send_check(struct sk_buff *skb) +{ + if (!skb->encapsulation) + return -EINVAL; + return 0; +} + static const struct net_protocol net_gre_protocol = { .handler = gre_rcv, .err_handler = gre_err, .netns_ok = 1, }; +static const struct net_offload gre_offload = { + .callbacks = { + .gso_send_check = gre_gso_send_check, + .gso_segment = gre_gso_segment, + }, +}; + static int __init gre_init(void) { pr_info("GRE over IPv4 demultiplexor driver\n"); @@ -127,11 +238,18 @@ static int __init gre_init(void) return -EAGAIN; } + if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { + pr_err("can't add protocol offload\n"); + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); + return -EAGAIN; + } + return 0; } static void __exit gre_exit(void) { + inet_del_offload(&gre_offload, IPPROTO_GRE); inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 17ff9fd..3ac5dff 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -934,6 +934,29 @@ error: goto drop; } +void icmp_err(struct sk_buff *skb, u32 info) +{ + struct iphdr *iph = (struct iphdr *)skb->data; + struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int type = icmp_hdr(skb)->type; + int code = icmp_hdr(skb)->code; + struct net *net = dev_net(skb->dev); + + /* + * Use ping_err to handle all icmp errors except those + * triggered by ICMP_ECHOREPLY which sent from kernel. + */ + if (icmph->type != ICMP_ECHOREPLY) { + ping_err(skb, info); + return; + } + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ICMP, 0); + else if (type == ICMP_REDIRECT) + ipv4_redirect(skb, net, 0, 0, IPPROTO_ICMP, 0); +} + /* * This table is the definition of how we handle ICMP. */ diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 736ab70..d8c2327 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2646,24 +2646,25 @@ static int __net_init igmp_net_init(struct net *net) { struct proc_dir_entry *pde; - pde = proc_net_fops_create(net, "igmp", S_IRUGO, &igmp_mc_seq_fops); + pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops); if (!pde) goto out_igmp; - pde = proc_net_fops_create(net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops); + pde = proc_create("mcfilter", S_IRUGO, net->proc_net, + &igmp_mcf_seq_fops); if (!pde) goto out_mcfilter; return 0; out_mcfilter: - proc_net_remove(net, "igmp"); + remove_proc_entry("igmp", net->proc_net); out_igmp: return -ENOMEM; } static void __net_exit igmp_net_exit(struct net *net) { - proc_net_remove(net, "mcfilter"); - proc_net_remove(net, "igmp"); + remove_proc_entry("mcfilter", net->proc_net); + remove_proc_entry("igmp", net->proc_net); } static struct pernet_operations igmp_net_ops = { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d0670f0..786d97a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -57,8 +57,9 @@ int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb, bool relax) { struct sock *sk2; - struct hlist_node *node; int reuse = sk->sk_reuse; + int reuseport = sk->sk_reuseport; + kuid_t uid = sock_i_uid((struct sock *)sk); /* * Unlike other sk lookup places we do not check @@ -67,14 +68,17 @@ int inet_csk_bind_conflict(const struct sock *sk, * one this bucket belongs to. */ - sk_for_each_bound(sk2, node, &tb->owners) { + sk_for_each_bound(sk2, &tb->owners) { if (sk != sk2 && !inet_v6_ipv6only(sk2) && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if (!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) { + if ((!reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + (!reuseport || !sk2->sk_reuseport || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(uid, sock_i_uid(sk2))))) { const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || sk2_rcv_saddr == sk_rcv_saddr(sk)) @@ -90,7 +94,7 @@ int inet_csk_bind_conflict(const struct sock *sk, } } } - return node != NULL; + return sk2 != NULL; } EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); @@ -101,11 +105,11 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_bind_hashbucket *head; - struct hlist_node *node; struct inet_bind_bucket *tb; int ret, attempts = 5; struct net *net = sock_net(sk); int smallest_size = -1, smallest_rover; + kuid_t uid = sock_i_uid(sk); local_bh_disable(); if (!snum) { @@ -123,11 +127,14 @@ again: head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)]; spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->port == rover) { - if (tb->fastreuse > 0 && - sk->sk_reuse && - sk->sk_state != TCP_LISTEN && + if (((tb->fastreuse > 0 && + sk->sk_reuse && + sk->sk_state != TCP_LISTEN) || + (tb->fastreuseport > 0 && + sk->sk_reuseport && + uid_eq(tb->fastuid, uid))) && (tb->num_owners < smallest_size || smallest_size == -1)) { smallest_size = tb->num_owners; smallest_rover = rover; @@ -174,7 +181,7 @@ have_snum: head = &hashinfo->bhash[inet_bhashfn(net, snum, hashinfo->bhash_size)]; spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->port == snum) goto tb_found; } @@ -185,14 +192,18 @@ tb_found: if (sk->sk_reuse == SK_FORCE_REUSE) goto success; - if (tb->fastreuse > 0 && - sk->sk_reuse && sk->sk_state != TCP_LISTEN && + if (((tb->fastreuse > 0 && + sk->sk_reuse && sk->sk_state != TCP_LISTEN) || + (tb->fastreuseport > 0 && + sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && smallest_size == -1) { goto success; } else { ret = 1; if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { - if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && + if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) || + (tb->fastreuseport > 0 && + sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && smallest_size != -1 && --attempts >= 0) { spin_unlock(&head->lock); goto again; @@ -212,9 +223,19 @@ tb_not_found: tb->fastreuse = 1; else tb->fastreuse = 0; - } else if (tb->fastreuse && - (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) - tb->fastreuse = 0; + if (sk->sk_reuseport) { + tb->fastreuseport = 1; + tb->fastuid = uid; + } else + tb->fastreuseport = 0; + } else { + if (tb->fastreuse && + (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) + tb->fastreuse = 0; + if (tb->fastreuseport && + (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))) + tb->fastreuseport = 0; + } success: if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, snum); @@ -714,6 +735,7 @@ EXPORT_SYMBOL(inet_csk_destroy_sock); * tcp/dccp_create_openreq_child(). */ void inet_csk_prepare_forced_close(struct sock *sk) + __releases(&sk->sk_lock.slock) { /* sk_clone_lock locked the socket and set refcnt to 2 */ bh_unlock_sock(sk); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 4750d2b..f4fd23d 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -21,6 +21,7 @@ #include <linux/rtnetlink.h> #include <linux/slab.h> +#include <net/sock.h> #include <net/inet_frag.h> static void inet_frag_secret_rebuild(unsigned long dummy) @@ -33,9 +34,9 @@ static void inet_frag_secret_rebuild(unsigned long dummy) get_random_bytes(&f->rnd, sizeof(u32)); for (i = 0; i < INETFRAGS_HASHSZ; i++) { struct inet_frag_queue *q; - struct hlist_node *p, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) { + hlist_for_each_entry_safe(q, n, &f->hash[i], list) { unsigned int hval = f->hashfn(q); if (hval != i) { @@ -73,8 +74,9 @@ EXPORT_SYMBOL(inet_frags_init); void inet_frags_init_net(struct netns_frags *nf) { nf->nqueues = 0; - atomic_set(&nf->mem, 0); + init_frag_mem_limit(nf); INIT_LIST_HEAD(&nf->lru_list); + spin_lock_init(&nf->lru_lock); } EXPORT_SYMBOL(inet_frags_init_net); @@ -91,6 +93,8 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) local_bh_disable(); inet_frag_evictor(nf, f, true); local_bh_enable(); + + percpu_counter_destroy(&nf->mem); } EXPORT_SYMBOL(inet_frags_exit_net); @@ -98,9 +102,9 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) { write_lock(&f->lock); hlist_del(&fq->list); - list_del(&fq->lru_list); fq->net->nqueues--; write_unlock(&f->lock); + inet_frag_lru_del(fq); } void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) @@ -117,12 +121,8 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) EXPORT_SYMBOL(inet_frag_kill); static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, - struct sk_buff *skb, int *work) + struct sk_buff *skb) { - if (work) - *work -= skb->truesize; - - atomic_sub(skb->truesize, &nf->mem); if (f->skb_free) f->skb_free(skb); kfree_skb(skb); @@ -133,6 +133,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, { struct sk_buff *fp; struct netns_frags *nf; + unsigned int sum, sum_truesize = 0; WARN_ON(!(q->last_in & INET_FRAG_COMPLETE)); WARN_ON(del_timer(&q->timer) != 0); @@ -143,13 +144,14 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, while (fp) { struct sk_buff *xp = fp->next; - frag_kfree_skb(nf, f, fp, work); + sum_truesize += fp->truesize; + frag_kfree_skb(nf, f, fp); fp = xp; } - + sum = sum_truesize + f->qsize; if (work) - *work -= f->qsize; - atomic_sub(f->qsize, &nf->mem); + *work -= sum; + sub_frag_mem_limit(q, sum); if (f->destructor) f->destructor(q); @@ -164,22 +166,23 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) int work, evicted = 0; if (!force) { - if (atomic_read(&nf->mem) <= nf->high_thresh) + if (frag_mem_limit(nf) <= nf->high_thresh) return 0; } - work = atomic_read(&nf->mem) - nf->low_thresh; + work = frag_mem_limit(nf) - nf->low_thresh; while (work > 0) { - read_lock(&f->lock); + spin_lock(&nf->lru_lock); + if (list_empty(&nf->lru_list)) { - read_unlock(&f->lock); + spin_unlock(&nf->lru_lock); break; } q = list_first_entry(&nf->lru_list, struct inet_frag_queue, lru_list); atomic_inc(&q->refcnt); - read_unlock(&f->lock); + spin_unlock(&nf->lru_lock); spin_lock(&q->lock); if (!(q->last_in & INET_FRAG_COMPLETE)) @@ -201,7 +204,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, { struct inet_frag_queue *qp; #ifdef CONFIG_SMP - struct hlist_node *n; #endif unsigned int hash; @@ -217,7 +219,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, * such entry could be created on other cpu, while we * promoted read lock to write lock. */ - hlist_for_each_entry(qp, n, &f->hash[hash], list) { + hlist_for_each_entry(qp, &f->hash[hash], list) { if (qp->net == nf && f->match(qp, arg)) { atomic_inc(&qp->refcnt); write_unlock(&f->lock); @@ -233,9 +235,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); hlist_add_head(&qp->list, &f->hash[hash]); - list_add_tail(&qp->lru_list, &nf->lru_list); nf->nqueues++; write_unlock(&f->lock); + inet_frag_lru_add(nf, qp); return qp; } @@ -250,7 +252,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, q->net = nf; f->constructor(q, arg); - atomic_add(f->qsize, &nf->mem); + add_frag_mem_limit(q, f->qsize); + setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); atomic_set(&q->refcnt, 1); @@ -275,17 +278,33 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, __releases(&f->lock) { struct inet_frag_queue *q; - struct hlist_node *n; + int depth = 0; - hlist_for_each_entry(q, n, &f->hash[hash], list) { + hlist_for_each_entry(q, &f->hash[hash], list) { if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); read_unlock(&f->lock); return q; } + depth++; } read_unlock(&f->lock); - return inet_frag_create(nf, f, key); + if (depth <= INETFRAGS_MAXDEPTH) + return inet_frag_create(nf, f, key); + else + return ERR_PTR(-ENOBUFS); } EXPORT_SYMBOL(inet_frag_find); + +void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, + const char *prefix) +{ + static const char msg[] = "inet_frag_find: Fragment hash bucket" + " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH) + ". Dropping fragment.\n"; + + if (PTR_ERR(q) == -ENOBUFS) + LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg); +} +EXPORT_SYMBOL(inet_frag_maybe_warn_overflow); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fa3ae81..6af375a 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -39,6 +39,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, write_pnet(&tb->ib_net, hold_net(net)); tb->port = snum; tb->fastreuse = 0; + tb->fastreuseport = 0; tb->num_owners = 0; INIT_HLIST_HEAD(&tb->owners); hlist_add_head(&tb->node, &head->chain); @@ -119,13 +120,12 @@ int __inet_inherit_port(struct sock *sk, struct sock *child) * that the listener socket's icsk_bind_hash is the same * as that of the child socket. We have to look up or * create a new bind bucket for the child here. */ - struct hlist_node *node; - inet_bind_bucket_for_each(tb, node, &head->chain) { + inet_bind_bucket_for_each(tb, &head->chain) { if (net_eq(ib_net(tb), sock_net(sk)) && tb->port == port) break; } - if (!node) { + if (!tb) { tb = inet_bind_bucket_create(table->bind_bucket_cachep, sock_net(sk), head, port); if (!tb) { @@ -151,16 +151,16 @@ static inline int compute_score(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && inet->inet_num == hnum && !ipv6_only_sock(sk)) { __be32 rcv_saddr = inet->inet_rcv_saddr; - score = sk->sk_family == PF_INET ? 1 : 0; + score = sk->sk_family == PF_INET ? 2 : 1; if (rcv_saddr) { if (rcv_saddr != daddr) return -1; - score += 2; + score += 4; } if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) return -1; - score += 2; + score += 4; } } return score; @@ -176,6 +176,7 @@ static inline int compute_score(struct sock *sk, struct net *net, struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + const __be32 saddr, __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif) { @@ -183,17 +184,29 @@ struct sock *__inet_lookup_listener(struct net *net, struct hlist_nulls_node *node; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; - int score, hiscore; + int score, hiscore, matches = 0, reuseport = 0; + u32 phash = 0; rcu_read_lock(); begin: result = NULL; - hiscore = -1; + hiscore = 0; sk_nulls_for_each_rcu(sk, node, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif); if (score > hiscore) { result = sk; hiscore = score; + reuseport = sk->sk_reuseport; + if (reuseport) { + phash = inet_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } + } else if (score == hiscore && reuseport) { + matches++; + if (((u64)phash * matches) >> 32 == 0) + result = sk; + phash = next_pseudo_random32(phash); } } /* @@ -479,7 +492,6 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, int i, remaining, low, high, port; static u32 hint; u32 offset = hint + port_offset; - struct hlist_node *node; struct inet_timewait_sock *tw = NULL; inet_get_local_port_range(&low, &high); @@ -498,10 +510,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, * because the established check is already * unique enough. */ - inet_bind_bucket_for_each(tb, node, &head->chain) { + inet_bind_bucket_for_each(tb, &head->chain) { if (net_eq(ib_net(tb), net) && tb->port == port) { - if (tb->fastreuse >= 0) + if (tb->fastreuse >= 0 || + tb->fastreuseport >= 0) goto next_port; WARN_ON(hlist_empty(&tb->owners)); if (!check_established(death_row, sk, @@ -518,6 +531,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, break; } tb->fastreuse = -1; + tb->fastreuseport = -1; goto ok; next_port: diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 2784db3..1f27c9f 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -216,7 +216,6 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, const int slot) { struct inet_timewait_sock *tw; - struct hlist_node *node; unsigned int killed; int ret; @@ -229,7 +228,7 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, killed = 0; ret = 0; rescan: - inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { + inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) { __inet_twsk_del_dead_node(tw); spin_unlock(&twdr->death_lock); __inet_twsk_kill(tw, twdr->hashinfo); @@ -438,10 +437,10 @@ void inet_twdr_twcal_tick(unsigned long data) for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { if (time_before_eq(j, now)) { - struct hlist_node *node, *safe; + struct hlist_node *safe; struct inet_timewait_sock *tw; - inet_twsk_for_each_inmate_safe(tw, node, safe, + inet_twsk_for_each_inmate_safe(tw, safe, &twdr->twcal_row[slot]) { __inet_twsk_del_dead_node(tw); __inet_twsk_kill(tw, twdr->hashinfo); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index eb9d63a..52c273e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -122,7 +122,7 @@ int ip_frag_nqueues(struct net *net) int ip_frag_mem(struct net *net) { - return atomic_read(&net->ipv4.frags.mem); + return sum_frag_mem_limit(&net->ipv4.frags); } static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, @@ -161,13 +161,6 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a) qp->user == arg->user; } -/* Memory Tracking Functions. */ -static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) -{ - atomic_sub(skb->truesize, &nf->mem); - kfree_skb(skb); -} - static void ip4_frag_init(struct inet_frag_queue *q, void *a) { struct ipq *qp = container_of(q, struct ipq, q); @@ -255,8 +248,7 @@ static void ip_expire(unsigned long arg) if (!head->dev) goto out_rcu_unlock; - /* skb dst is stale, drop it, and perform route lookup again */ - skb_dst_drop(head); + /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); @@ -299,14 +291,11 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); - if (q == NULL) - goto out_nomem; - + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); + return NULL; + } return container_of(q, struct ipq, q); - -out_nomem: - LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n")); - return NULL; } /* Is the fragment too far ahead to be part of ipq? */ @@ -340,6 +329,7 @@ static inline int ip_frag_too_far(struct ipq *qp) static int ip_frag_reinit(struct ipq *qp) { struct sk_buff *fp; + unsigned int sum_truesize = 0; if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) { atomic_inc(&qp->q.refcnt); @@ -349,9 +339,12 @@ static int ip_frag_reinit(struct ipq *qp) fp = qp->q.fragments; do { struct sk_buff *xp = fp->next; - frag_kfree_skb(qp->q.net, fp); + + sum_truesize += fp->truesize; + kfree_skb(fp); fp = xp; } while (fp); + sub_frag_mem_limit(&qp->q, sum_truesize); qp->q.last_in = 0; qp->q.len = 0; @@ -496,7 +489,8 @@ found: qp->q.fragments = next; qp->q.meat -= free_it->len; - frag_kfree_skb(qp->q.net, free_it); + sub_frag_mem_limit(&qp->q, free_it->truesize); + kfree_skb(free_it); } } @@ -519,7 +513,7 @@ found: qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; qp->ecn |= ecn; - atomic_add(skb->truesize, &qp->q.net->mem); + add_frag_mem_limit(&qp->q, skb->truesize); if (offset == 0) qp->q.last_in |= INET_FRAG_FIRST_IN; @@ -528,12 +522,17 @@ found: qp->q.max_size = skb->len + ihl; if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - qp->q.meat == qp->q.len) - return ip_frag_reasm(qp, prev, dev); + qp->q.meat == qp->q.len) { + unsigned long orefdst = skb->_skb_refdst; - write_lock(&ip4_frags.lock); - list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list); - write_unlock(&ip4_frags.lock); + skb->_skb_refdst = 0UL; + err = ip_frag_reasm(qp, prev, dev); + skb->_skb_refdst = orefdst; + return err; + } + + skb_dst_drop(skb); + inet_frag_lru_move(&qp->q); return -EINPROGRESS; err: @@ -594,7 +593,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_nomem; /* If the first fragment is fragmented itself, we split @@ -617,7 +616,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &qp->q.net->mem); + add_frag_mem_limit(&qp->q, clone->truesize); } skb_push(head, head->data - skb_network_header(head)); @@ -645,7 +644,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, } fp = next; } - atomic_sub(sum_truesize, &qp->q.net->mem); + sub_frag_mem_limit(&qp->q, sum_truesize); head->next = NULL; head->dev = dev; @@ -851,14 +850,22 @@ static inline void ip4_frags_ctl_register(void) static int __net_init ipv4_frags_init_net(struct net *net) { - /* - * Fragment cache limits. We will commit 256K at one time. Should we - * cross that limit we will prune down to 192K. This should cope with - * even the most extreme cases without allowing an attacker to - * measurably harm machine performance. + /* Fragment cache limits. + * + * The fragment memory accounting code, (tries to) account for + * the real memory usage, by measuring both the size of frag + * queue struct (inet_frag_queue (ipv4:ipq/ipv6:frag_queue)) + * and the SKB's truesize. + * + * A 64K fragment consumes 129736 bytes (44*2944)+200 + * (1500 truesize == 2944, sizeof(struct ipq) == 200) + * + * We will commit 4MB at one time. Should we cross that limit + * we will prune down to 3MB, making room for approx 8 big 64K + * fragments 8x128k. */ - net->ipv4.frags.high_thresh = 256 * 1024; - net->ipv4.frags.low_thresh = 192 * 1024; + net->ipv4.frags.high_thresh = 4 * 1024 * 1024; + net->ipv4.frags.low_thresh = 3 * 1024 * 1024; /* * Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a85ae2f..91d66db 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -735,10 +735,36 @@ drop: return 0; } +static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb) +{ + int err; + + if (skb_is_gso(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + goto error; + skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; + return skb; + } else if (skb->ip_summed == CHECKSUM_PARTIAL && + tunnel->parms.o_flags&GRE_CSUM) { + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + + return skb; + +error: + kfree_skb(skb); + return ERR_PTR(err); +} + static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { + struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); struct ip_tunnel *tunnel = netdev_priv(dev); - const struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *old_iph; const struct iphdr *tiph; struct flowi4 fl4; u8 tos; @@ -750,10 +776,22 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev int gre_hlen; __be32 dst; int mtu; + u8 ttl; + int err; + int pkt_len; - if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb)) - goto tx_error; + skb = handle_offloads(tunnel, skb); + if (IS_ERR(skb)) { + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + if (!skb->encapsulation) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + old_iph = ip_hdr(skb); if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -812,9 +850,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev goto tx_error; } + ttl = tiph->ttl; tos = tiph->tos; - if (tos == 1) { - tos = 0; + if (tos & 0x1) { + tos &= ~0x1; if (skb->protocol == htons(ETH_P_IP)) tos = old_iph->tos; else if (skb->protocol == htons(ETH_P_IPV6)) @@ -848,7 +887,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (skb->protocol == htons(ETH_P_IP)) { df |= (old_iph->frag_off&htons(IP_DF)); - if ((old_iph->frag_off&htons(IP_DF)) && + if (!skb_is_gso(skb) && + (old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); @@ -868,7 +908,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } } - if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { + if (!skb_is_gso(skb) && + mtu >= IPV6_MIN_MTU && + mtu < skb->len - tunnel->hlen + gre_hlen) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; @@ -904,11 +946,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev dev_kfree_skb(skb); skb = new_skb; old_iph = ip_hdr(skb); + /* Warning : tiph value might point to freed memory */ } - skb_reset_transport_header(skb); skb_push(skb, gre_hlen); skb_reset_network_header(skb); + skb_set_transport_header(skb, sizeof(*iph)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); @@ -927,8 +970,11 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); iph->daddr = fl4.daddr; iph->saddr = fl4.saddr; + iph->ttl = ttl; - if ((iph->ttl = tiph->ttl) == 0) { + tunnel_ip_select_ident(skb, old_iph, &rt->dst); + + if (ttl == 0) { if (skb->protocol == htons(ETH_P_IP)) iph->ttl = old_iph->ttl; #if IS_ENABLED(CONFIG_IPV6) @@ -955,13 +1001,37 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev *ptr = tunnel->parms.o_key; ptr--; } - if (tunnel->parms.o_flags&GRE_CSUM) { + /* Skip GRE checksum if skb is getting offloaded. */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) && + (tunnel->parms.o_flags&GRE_CSUM)) { + int offset = skb_transport_offset(skb); + + if (skb_has_shared_frag(skb)) { + err = __skb_linearize(skb); + if (err) + goto tx_error; + } + *ptr = 0; - *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr)); + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, + skb->len - offset, + 0)); } } - iptunnel_xmit(skb, dev); + nf_reset(skb); + + pkt_len = skb->len - skb_transport_offset(skb); + err = ip_local_out(skb); + if (likely(net_xmit_eval(err) == 0)) { + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } return NETDEV_TX_OK; #if IS_ENABLED(CONFIG_IPV6) @@ -1031,6 +1101,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) mtu = 68; tunnel->hlen = addend; + /* TCP offload with GRE SEQ is not supported. */ + if (!(tunnel->parms.o_flags & GRE_SEQ)) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } return mtu; } @@ -1580,6 +1655,9 @@ static void ipgre_tap_setup(struct net_device *dev) dev->iflink = 0; dev->features |= NETIF_F_NETNS_LOCAL; + + dev->features |= GRE_FEATURES; + dev->hw_features |= GRE_FEATURES; } static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index f1395a6..2bdf802 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -208,13 +208,6 @@ static int ip_local_deliver_finish(struct sk_buff *skb) if (ipprot != NULL) { int ret; - if (!net_eq(net, &init_net) && !ipprot->netns_ok) { - net_info_ratelimited("%s: proto %d isn't netns-ready\n", - __func__, protocol); - kfree_skb(skb); - goto out; - } - if (!ipprot->no_policy) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { kfree_skb(skb); @@ -235,9 +228,11 @@ static int ip_local_deliver_finish(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); } - } else + kfree_skb(skb); + } else { IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); - kfree_skb(skb); + consume_skb(skb); + } } } out: diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index f6289bf..ec72645 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -370,7 +370,6 @@ int ip_options_compile(struct net *net, } switch (optptr[3]&0xF) { case IPOPT_TS_TSONLY: - opt->ts = optptr - iph; if (skb) timeptr = &optptr[optptr[2]-1]; opt->ts_needtime = 1; @@ -381,7 +380,6 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } - opt->ts = optptr - iph; if (rt) { spec_dst_fill(&spec_dst, skb); memcpy(&optptr[optptr[2]-1], &spec_dst, 4); @@ -396,7 +394,6 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } - opt->ts = optptr - iph; { __be32 addr; memcpy(&addr, &optptr[optptr[2]-1], 4); @@ -423,18 +420,18 @@ int ip_options_compile(struct net *net, put_unaligned_be32(midtime, timeptr); opt->is_changed = 1; } - } else { + } else if ((optptr[3]&0xF) != IPOPT_TS_PRESPEC) { unsigned int overflow = optptr[3]>>4; if (overflow == 15) { pp_ptr = optptr + 3; goto error; } - opt->ts = optptr - iph; if (skb) { optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4); opt->is_changed = 1; } } + opt->ts = optptr - iph; break; case IPOPT_RA: if (optlen < 4) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3e98ed2..5e12dca 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -598,6 +598,7 @@ slow_path: /* for offloaded checksums cleanup checksum before fragmentation */ if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb)) goto fail; + iph = ip_hdr(skb); left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 3c9d208..d9c4f11 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -590,7 +590,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_TTL: if (optlen < 1) goto e_inval; - if (val != -1 && (val < 0 || val > 255)) + if (val != -1 && (val < 1 || val > 255)) goto e_inval; inet->uc_ttl = val; break; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index d3ab47e..f01d1b1 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -47,9 +47,12 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { + atomic_inc(&flow_cache_genid); + rt_genid_bump(net); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); - else + } else ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); } @@ -160,6 +163,7 @@ static const struct net_protocol ipcomp4_protocol = { .handler = xfrm4_rcv, .err_handler = ipcomp4_err, .no_policy = 1, + .netns_ok = 1, }; static int __init ipcomp4_init(void) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index d763701..bf6c5cf 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -136,6 +136,8 @@ __be32 ic_myaddr = NONE; /* My IP address */ static __be32 ic_netmask = NONE; /* Netmask for local subnet */ __be32 ic_gateway = NONE; /* Gateway IP address */ +__be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ + __be32 ic_servaddr = NONE; /* Boot server IP address */ __be32 root_server_addr = NONE; /* Address of NFS server */ @@ -558,6 +560,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (ic_myaddr == NONE) ic_myaddr = tip; ic_servaddr = sip; + ic_addrservaddr = sip; ic_got_reply = IC_RARP; drop_unlock: @@ -1068,7 +1071,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str ic_servaddr = server_id; #ifdef IPCONFIG_DEBUG printk("DHCP: Offered address %pI4 by server %pI4\n", - &ic_myaddr, &ic_servaddr); + &ic_myaddr, &b->iph.saddr); #endif /* The DHCP indicated server address takes * precedence over the bootp header one if @@ -1113,6 +1116,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str ic_dev = dev; ic_myaddr = b->your_ip; ic_servaddr = b->server_ip; + ic_addrservaddr = b->iph.saddr; if (ic_gateway == NONE && b->relay_ip) ic_gateway = b->relay_ip; if (ic_nameservers[0] == NONE) @@ -1268,7 +1272,7 @@ static int __init ic_dynamic(void) printk("IP-Config: Got %s answer from %pI4, ", ((ic_got_reply & IC_RARP) ? "RARP" : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), - &ic_servaddr); + &ic_addrservaddr); pr_cont("my address is %pI4\n", &ic_myaddr); return 0; @@ -1390,7 +1394,7 @@ static int __init ip_auto_config(void) unsigned int i; #ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); + proc_create("pnp", S_IRUGO, init_net.proc_net, &pnp_seq_fops); #endif /* CONFIG_PROC_FS */ if (!ic_enable) @@ -1518,7 +1522,8 @@ static int __init ip_auto_config(void) } for (i++; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) - pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]); + pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); + pr_cont("\n"); #endif /* !SILENT */ return 0; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 191fc24..8f024d4 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -472,7 +472,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ - const struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *old_iph; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; @@ -486,6 +486,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb_checksum_help(skb)) goto tx_error; + old_iph = ip_hdr(skb); + if (tos & 1) tos = old_iph->tos; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a9454cb..5f95b3a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -828,6 +828,49 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, return NULL; } +/* Look for a (*,*,oif) entry */ +static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt, + int vifi) +{ + int line = MFC_HASH(htonl(INADDR_ANY), htonl(INADDR_ANY)); + struct mfc_cache *c; + + list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) + if (c->mfc_origin == htonl(INADDR_ANY) && + c->mfc_mcastgrp == htonl(INADDR_ANY) && + c->mfc_un.res.ttls[vifi] < 255) + return c; + + return NULL; +} + +/* Look for a (*,G) entry */ +static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, + __be32 mcastgrp, int vifi) +{ + int line = MFC_HASH(mcastgrp, htonl(INADDR_ANY)); + struct mfc_cache *c, *proxy; + + if (mcastgrp == htonl(INADDR_ANY)) + goto skip; + + list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) + if (c->mfc_origin == htonl(INADDR_ANY) && + c->mfc_mcastgrp == mcastgrp) { + if (c->mfc_un.res.ttls[vifi] < 255) + return c; + + /* It's ok if the vifi is part of the static tree */ + proxy = ipmr_cache_find_any_parent(mrt, + c->mfc_parent); + if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) + return c; + } + +skip: + return ipmr_cache_find_any_parent(mrt, vifi); +} + /* * Allocate a multicast cache entry */ @@ -1053,7 +1096,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) * MFC cache manipulation by user space mroute daemon */ -static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) +static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { int line; struct mfc_cache *c, *next; @@ -1062,7 +1105,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && - c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { + c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && + (parent == -1 || parent == c->mfc_parent)) { list_del_rcu(&c->list); mroute_netlink_event(mrt, c, RTM_DELROUTE); ipmr_cache_free(c); @@ -1073,7 +1117,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) } static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, - struct mfcctl *mfc, int mrtsock) + struct mfcctl *mfc, int mrtsock, int parent) { bool found = false; int line; @@ -1086,7 +1130,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && - c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { + c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && + (parent == -1 || parent == c->mfc_parent)) { found = true; break; } @@ -1103,7 +1148,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, return 0; } - if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) + if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && + !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) return -EINVAL; c = ipmr_cache_alloc(); @@ -1218,7 +1264,7 @@ static void mrtsock_destruct(struct sock *sk) int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) { - int ret; + int ret, parent = 0; struct vifctl vif; struct mfcctl mfc; struct net *net = sock_net(sk); @@ -1287,16 +1333,22 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi */ case MRT_ADD_MFC: case MRT_DEL_MFC: + parent = -1; + case MRT_ADD_MFC_PROXY: + case MRT_DEL_MFC_PROXY: if (optlen != sizeof(mfc)) return -EINVAL; if (copy_from_user(&mfc, optval, sizeof(mfc))) return -EFAULT; + if (parent == 0) + parent = mfc.mfcc_parent; rtnl_lock(); - if (optname == MRT_DEL_MFC) - ret = ipmr_mfc_delete(mrt, &mfc); + if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) + ret = ipmr_mfc_delete(mrt, &mfc, parent); else ret = ipmr_mfc_add(net, mrt, &mfc, - sk == rtnl_dereference(mrt->mroute_sk)); + sk == rtnl_dereference(mrt->mroute_sk), + parent); rtnl_unlock(); return ret; /* @@ -1749,17 +1801,28 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, { int psend = -1; int vif, ct; + int true_vifi = ipmr_find_vif(mrt, skb->dev); vif = cache->mfc_parent; cache->mfc_un.res.pkt++; cache->mfc_un.res.bytes += skb->len; + if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { + struct mfc_cache *cache_proxy; + + /* For an (*,G) entry, we only check that the incomming + * interface is part of the static tree. + */ + cache_proxy = ipmr_cache_find_any_parent(mrt, vif); + if (cache_proxy && + cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + goto forward; + } + /* * Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif_table[vif].dev != skb->dev) { - int true_vifi; - if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... @@ -1776,7 +1839,6 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, } cache->mfc_un.res.wrong_if++; - true_vifi = ipmr_find_vif(mrt, skb->dev); if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -1794,15 +1856,34 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, goto dont_forward; } +forward: mrt->vif_table[vif].pkt_in++; mrt->vif_table[vif].bytes_in += skb->len; /* * Forward the frame */ + if (cache->mfc_origin == htonl(INADDR_ANY) && + cache->mfc_mcastgrp == htonl(INADDR_ANY)) { + if (true_vifi >= 0 && + true_vifi != cache->mfc_parent && + ip_hdr(skb)->ttl > + cache->mfc_un.res.ttls[cache->mfc_parent]) { + /* It's an (*,*) entry and the packet is not coming from + * the upstream: forward the packet to the upstream + * only. + */ + psend = cache->mfc_parent; + goto last_forward; + } + goto dont_forward; + } for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { - if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { + /* For (*,G) entry, don't forward to the incoming interface */ + if ((cache->mfc_origin != htonl(INADDR_ANY) || + ct != true_vifi) && + ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -1813,6 +1894,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, psend = ct; } } +last_forward: if (psend != -1) { if (local) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -1902,6 +1984,13 @@ int ip_mr_input(struct sk_buff *skb) /* already under rcu_read_lock() */ cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); + if (cache == NULL) { + int vif = ipmr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, + vif); + } /* * No usable cache entry @@ -2107,7 +2196,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, rcu_read_lock(); cache = ipmr_cache_find(mrt, saddr, daddr); + if (cache == NULL && skb->dev) { + int vif = ipmr_find_vif(mrt, skb->dev); + if (vif >= 0) + cache = ipmr_cache_find_any(mrt, daddr, vif); + } if (cache == NULL) { struct sk_buff *skb2; struct iphdr *iph; @@ -2609,16 +2703,16 @@ static int __net_init ipmr_net_init(struct net *net) #ifdef CONFIG_PROC_FS err = -ENOMEM; - if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops)) + if (!proc_create("ip_mr_vif", 0, net->proc_net, &ipmr_vif_fops)) goto proc_vif_fail; - if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops)) + if (!proc_create("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_fops)) goto proc_cache_fail; #endif return 0; #ifdef CONFIG_PROC_FS proc_cache_fail: - proc_net_remove(net, "ip_mr_vif"); + remove_proc_entry("ip_mr_vif", net->proc_net); proc_vif_fail: ipmr_rules_exit(net); #endif @@ -2629,8 +2723,8 @@ fail: static void __net_exit ipmr_net_exit(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_remove(net, "ip_mr_cache"); - proc_net_remove(net, "ip_mr_vif"); + remove_proc_entry("ip_mr_cache", net->proc_net); + remove_proc_entry("ip_mr_vif", net->proc_net); #endif ipmr_rules_exit(net); } diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index d8d6f2a..0d755c5 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -36,19 +36,6 @@ config NF_CONNTRACK_PROC_COMPAT If unsure, say Y. -config IP_NF_QUEUE - tristate "IP Userspace queueing via NETLINK (OBSOLETE)" - depends on NETFILTER_ADVANCED - help - Netfilter has the ability to queue packets to user space: the - netlink device can be used to access them using this driver. - - This option enables the old IPv4-only "ip_queue" implementation - which has been obsoleted by the new "nfnetlink_queue" code (see - CONFIG_NETFILTER_NETLINK_QUEUE). - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" default m if NETFILTER_ADVANCED=n @@ -241,8 +228,8 @@ config IP_NF_MANGLE To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_CLUSTERIP - tristate "CLUSTERIP target support (EXPERIMENTAL)" - depends on IP_NF_MANGLE && EXPERIMENTAL + tristate "CLUSTERIP target support" + depends on IP_NF_MANGLE depends on NF_CONNTRACK_IPV4 depends on NETFILTER_ADVANCED select NF_CONNTRACK_MARK diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 3ea4127..7dc6a97 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -901,7 +901,7 @@ static int get_info(struct net *net, void __user *user, #endif t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), "arptable_%s", name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { struct arpt_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -958,7 +958,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, } t = xt_find_table_lock(net, NFPROTO_ARP, get.name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", @@ -1001,7 +1001,7 @@ static int __do_replace(struct net *net, const char *name, t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), "arptable_%s", name); - if (!t || IS_ERR(t)) { + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; } @@ -1158,7 +1158,7 @@ static int do_add_counters(struct net *net, const void __user *user, } t = xt_find_table_lock(net, NFPROTO_ARP, name); - if (!t || IS_ERR(t)) { + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; } @@ -1646,7 +1646,7 @@ static int compat_get_entries(struct net *net, xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 17c5e06..3efcf87 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1090,7 +1090,7 @@ static int get_info(struct net *net, void __user *user, #endif t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), "iptable_%s", name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { struct ipt_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -1149,7 +1149,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, } t = xt_find_table_lock(net, AF_INET, get.name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); if (get.size == private->size) @@ -1189,7 +1189,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), "iptable_%s", name); - if (!t || IS_ERR(t)) { + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; } @@ -1347,7 +1347,7 @@ do_add_counters(struct net *net, const void __user *user, } t = xt_find_table_lock(net, AF_INET, name); - if (!t || IS_ERR(t)) { + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; } @@ -1931,7 +1931,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; duprintf("t->private->number = %u\n", private->number); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 75e33a7..5852b24 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -657,7 +657,7 @@ static int clusterip_proc_release(struct inode *inode, struct file *file) static ssize_t clusterip_proc_write(struct file *file, const char __user *input, size_t size, loff_t *ofs) { - struct clusterip_config *c = PDE(file->f_path.dentry->d_inode)->data; + struct clusterip_config *c = PDE(file_inode(file))->data; #define PROC_WRITELEN 10 char buffer[PROC_WRITELEN+1]; unsigned long nodenum; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 51f13f8..04b18c1 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -81,6 +81,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) niph->saddr = oiph->daddr; niph->daddr = oiph->saddr; + skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); memset(tcph, 0, sizeof(*tcph)); tcph->source = oth->dest; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index b5ef3cb..7d168dc 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -88,10 +88,8 @@ static void ulog_send(unsigned int nlgroupnum) { ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; - if (timer_pending(&ub->timer)) { - pr_debug("ulog_send: timer was pending, deleting\n"); - del_timer(&ub->timer); - } + pr_debug("ulog_send: timer is deleting\n"); + del_timer(&ub->timer); if (!ub->skb) { pr_debug("ulog_send: nothing to send\n"); @@ -426,10 +424,8 @@ static void __exit ulog_tg_exit(void) /* remove pending timers and free allocated skb's */ for (i = 0; i < ULOG_MAXNLGROUPS; i++) { ub = &ulog_buffers[i]; - if (timer_pending(&ub->timer)) { - pr_debug("timer was pending, deleting\n"); - del_timer(&ub->timer); - } + pr_debug("timer is deleting\n"); + del_timer(&ub->timer); if (ub->skb) { kfree_skb(ub->skb); diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index c301300..c49dcd0 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -66,6 +66,12 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4, return dev_match; } +static bool rpfilter_is_local(const struct sk_buff *skb) +{ + const struct rtable *rt = skb_rtable(skb); + return rt && (rt->rt_flags & RTCF_LOCAL); +} + static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rpfilter_info *info; @@ -76,7 +82,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) info = par->matchinfo; invert = info->flags & XT_RPFILTER_INVERT; - if (par->in->flags & IFF_LOOPBACK) + if (rpfilter_is_local(skb)) return true ^ invert; iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index da2c8a3..eeaff7e 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -124,23 +124,28 @@ nf_nat_ipv4_fn(unsigned int hooknum, ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; - } else + } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; + } break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) { - nf_ct_kill_acct(ct, ctinfo, skb); - return NF_DROP; - } + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; } return nf_nat_packet(ct, ctinfo, hooknum, skb); + +oif_changed: + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; } static unsigned int diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index fcdd0c2..2820aa1 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -100,7 +100,6 @@ static unsigned int ipv4_helper(unsigned int hooknum, enum ip_conntrack_info ctinfo; const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; - unsigned int ret; /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); @@ -116,13 +115,8 @@ static unsigned int ipv4_helper(unsigned int hooknum, if (!helper) return NF_ACCEPT; - ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), - ct, ctinfo); - if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { - nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL, - "nf_ct_%s: dropping packet", helper->name); - } - return ret; + return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), + ct, ctinfo); } static unsigned int ipv4_confirm(unsigned int hooknum, @@ -420,54 +414,43 @@ static int ipv4_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_tcp4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_tcp4 :protocol register failed\n"); + pr_err("nf_conntrack_tcp4: pernet registration failed\n"); goto out_tcp; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_udp4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_udp4 :protocol register failed\n"); + pr_err("nf_conntrack_udp4: pernet registration failed\n"); goto out_udp; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_icmp); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp); if (ret < 0) { - pr_err("nf_conntrack_l4proto_icmp4 :protocol register failed\n"); + pr_err("nf_conntrack_icmp4: pernet registration failed\n"); goto out_icmp; } - ret = nf_conntrack_l3proto_register(net, - &nf_conntrack_l3proto_ipv4); + ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4); if (ret < 0) { - pr_err("nf_conntrack_l3proto_ipv4 :protocol register failed\n"); + pr_err("nf_conntrack_ipv4: pernet registration failed\n"); goto out_ipv4; } return 0; out_ipv4: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_icmp); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); out_icmp: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_udp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); out_udp: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_tcp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4); out_tcp: return ret; } static void ipv4_net_exit(struct net *net) { - nf_conntrack_l3proto_unregister(net, - &nf_conntrack_l3proto_ipv4); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_icmp); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_udp4); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_tcp4); + nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4); } static struct pernet_operations ipv4_net_ops = { @@ -500,16 +483,49 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) pr_err("nf_conntrack_ipv4: can't register hooks.\n"); goto cleanup_pernet; } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n"); + goto cleanup_hooks; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n"); + goto cleanup_tcp4; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n"); + goto cleanup_udp4; + } + + ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n"); + goto cleanup_icmpv4; + } + #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) ret = nf_conntrack_ipv4_compat_init(); if (ret < 0) - goto cleanup_hooks; + goto cleanup_proto; #endif return ret; #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + cleanup_proto: + nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); +#endif + cleanup_icmpv4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); + cleanup_udp4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); + cleanup_tcp4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4); cleanup_hooks: nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); -#endif cleanup_pernet: unregister_pernet_subsys(&ipv4_net_ops); cleanup_sockopt: @@ -523,6 +539,10 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) nf_conntrack_ipv4_compat_fini(); #endif + nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4); nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); unregister_pernet_subsys(&ipv4_net_ops); nf_unregister_sockopt(&so_getorigdst); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 9682b36..f2ca127 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -417,12 +417,12 @@ static int __net_init ip_conntrack_net_init(struct net *net) { struct proc_dir_entry *proc, *proc_exp, *proc_stat; - proc = proc_net_fops_create(net, "ip_conntrack", 0440, &ct_file_ops); + proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops); if (!proc) goto err1; - proc_exp = proc_net_fops_create(net, "ip_conntrack_expect", 0440, - &ip_exp_file_ops); + proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net, + &ip_exp_file_ops); if (!proc_exp) goto err2; @@ -433,9 +433,9 @@ static int __net_init ip_conntrack_net_init(struct net *net) return 0; err3: - proc_net_remove(net, "ip_conntrack_expect"); + remove_proc_entry("ip_conntrack_expect", net->proc_net); err2: - proc_net_remove(net, "ip_conntrack"); + remove_proc_entry("ip_conntrack", net->proc_net); err1: return -ENOMEM; } @@ -443,8 +443,8 @@ err1: static void __net_exit ip_conntrack_net_exit(struct net *net) { remove_proc_entry("ip_conntrack", net->proc_net_stat); - proc_net_remove(net, "ip_conntrack_expect"); - proc_net_remove(net, "ip_conntrack"); + remove_proc_entry("ip_conntrack_expect", net->proc_net); + remove_proc_entry("ip_conntrack", net->proc_net); } static struct pernet_operations ip_conntrack_net_ops = { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8f3d054..2e91006 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -322,8 +322,8 @@ void ping_err(struct sk_buff *skb, u32 info) struct iphdr *iph = (struct iphdr *)skb->data; struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); struct inet_sock *inet_sock; - int type = icmph->type; - int code = icmph->code; + int type = icmp_hdr(skb)->type; + int code = icmp_hdr(skb)->code; struct net *net = dev_net(skb->dev); struct sock *sk; int harderr; @@ -738,6 +738,7 @@ struct proto ping_prot = { .recvmsg = ping_recvmsg, .bind = ping_bind, .backlog_rcv = ping_queue_rcv_skb, + .release_cb = ip4_datagram_release_cb, .hash = ping_v4_hash, .unhash = ping_v4_unhash, .get_port = ping_v4_get_port, @@ -888,7 +889,7 @@ static int ping_proc_register(struct net *net) struct proc_dir_entry *p; int rc = 0; - p = proc_net_fops_create(net, "icmp", S_IRUGO, &ping_seq_fops); + p = proc_create("icmp", S_IRUGO, net->proc_net, &ping_seq_fops); if (!p) rc = -ENOMEM; return rc; @@ -896,7 +897,7 @@ static int ping_proc_register(struct net *net) static void ping_proc_unregister(struct net *net) { - proc_net_remove(net, "icmp"); + remove_proc_entry("icmp", net->proc_net); } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 8de53e1..32030a2 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -471,28 +471,29 @@ static const struct file_operations netstat_seq_fops = { static __net_init int ip_proc_init_net(struct net *net) { - if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops)) + if (!proc_create("sockstat", S_IRUGO, net->proc_net, + &sockstat_seq_fops)) goto out_sockstat; - if (!proc_net_fops_create(net, "netstat", S_IRUGO, &netstat_seq_fops)) + if (!proc_create("netstat", S_IRUGO, net->proc_net, &netstat_seq_fops)) goto out_netstat; - if (!proc_net_fops_create(net, "snmp", S_IRUGO, &snmp_seq_fops)) + if (!proc_create("snmp", S_IRUGO, net->proc_net, &snmp_seq_fops)) goto out_snmp; return 0; out_snmp: - proc_net_remove(net, "netstat"); + remove_proc_entry("netstat", net->proc_net); out_netstat: - proc_net_remove(net, "sockstat"); + remove_proc_entry("sockstat", net->proc_net); out_sockstat: return -ENOMEM; } static __net_exit void ip_proc_exit_net(struct net *net) { - proc_net_remove(net, "snmp"); - proc_net_remove(net, "netstat"); - proc_net_remove(net, "sockstat"); + remove_proc_entry("snmp", net->proc_net); + remove_proc_entry("netstat", net->proc_net); + remove_proc_entry("sockstat", net->proc_net); } static __net_initdata struct pernet_operations ip_proc_ops = { diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 0f9d09f..ce84846 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -37,6 +37,12 @@ const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly; int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) { + if (!prot->netns_ok) { + pr_err("Protocol %u is not namespace aware, cannot register.\n", + protocol); + return -EINVAL; + } + return !cmpxchg((const struct net_protocol **)&inet_protos[protocol], NULL, prot) ? 0 : -1; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 73d1e4d..dd44e0a 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -111,9 +111,7 @@ EXPORT_SYMBOL_GPL(raw_unhash_sk); static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif) { - struct hlist_node *node; - - sk_for_each_from(sk, node) { + sk_for_each_from(sk) { struct inet_sock *inet = inet_sk(sk); if (net_eq(sock_net(sk), net) && inet->inet_num == num && @@ -894,6 +892,7 @@ struct proto raw_prot = { .recvmsg = raw_recvmsg, .bind = raw_bind, .backlog_rcv = raw_rcv_skb, + .release_cb = ip4_datagram_release_cb, .hash = raw_hash_sk, .unhash = raw_unhash_sk, .obj_size = sizeof(struct raw_sock), @@ -913,9 +912,7 @@ static struct sock *raw_get_first(struct seq_file *seq) for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE; ++state->bucket) { - struct hlist_node *node; - - sk_for_each(sk, node, &state->h->ht[state->bucket]) + sk_for_each(sk, &state->h->ht[state->bucket]) if (sock_net(sk) == seq_file_net(seq)) goto found; } @@ -1049,7 +1046,7 @@ static const struct file_operations raw_seq_fops = { static __net_init int raw_init_net(struct net *net) { - if (!proc_net_fops_create(net, "raw", S_IRUGO, &raw_seq_fops)) + if (!proc_create("raw", S_IRUGO, net->proc_net, &raw_seq_fops)) return -ENOMEM; return 0; @@ -1057,7 +1054,7 @@ static __net_init int raw_init_net(struct net *net) static __net_exit void raw_exit_net(struct net *net) { - proc_net_remove(net, "raw"); + remove_proc_entry("raw", net->proc_net); } static __net_initdata struct pernet_operations raw_net_ops = { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 844a9ef..6e28514 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -117,15 +117,11 @@ #define RT_GC_TIMEOUT (300*HZ) static int ip_rt_max_size; -static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; -static int ip_rt_gc_interval __read_mostly = 60 * HZ; -static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_redirect_number __read_mostly = 9; static int ip_rt_redirect_load __read_mostly = HZ / 50; static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); static int ip_rt_error_cost __read_mostly = HZ; static int ip_rt_error_burst __read_mostly = 5 * HZ; -static int ip_rt_gc_elasticity __read_mostly = 8; static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; @@ -384,8 +380,8 @@ static int __net_init ip_rt_do_proc_init(struct net *net) { struct proc_dir_entry *pde; - pde = proc_net_fops_create(net, "rt_cache", S_IRUGO, - &rt_cache_seq_fops); + pde = proc_create("rt_cache", S_IRUGO, net->proc_net, + &rt_cache_seq_fops); if (!pde) goto err1; @@ -912,6 +908,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) struct dst_entry *dst = &rt->dst; struct fib_result res; + if (dst_metric_locked(dst, RTAX_MTU)) + return; + if (dst->dev->mtu < mtu) return; @@ -962,7 +961,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, } EXPORT_SYMBOL_GPL(ipv4_update_pmtu); -void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) +static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) { const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; @@ -975,6 +974,53 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) ip_rt_put(rt); } } + +void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) +{ + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct flowi4 fl4; + struct rtable *rt; + struct dst_entry *dst; + bool new = false; + + bh_lock_sock(sk); + rt = (struct rtable *) __sk_dst_get(sk); + + if (sock_owned_by_user(sk) || !rt) { + __ipv4_sk_update_pmtu(skb, sk, mtu); + goto out; + } + + __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + + if (!__sk_dst_check(sk, 0)) { + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); + if (IS_ERR(rt)) + goto out; + + new = true; + } + + __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); + + dst = dst_check(&rt->dst, 0); + if (!dst) { + if (new) + dst_release(&rt->dst); + + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); + if (IS_ERR(rt)) + goto out; + + new = true; + } + + if (new) + __sk_dst_set(sk, &rt->dst); + +out: + bh_unlock_sock(sk); +} EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); void ipv4_redirect(struct sk_buff *skb, struct net *net, @@ -1120,7 +1166,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) if (!mtu || time_after_eq(jiffies, rt->dst.expires)) mtu = dst_metric_raw(dst, RTAX_MTU); - if (mtu && rt_is_output_route(rt)) + if (mtu) return mtu; mtu = dst->dev->mtu; @@ -2373,6 +2419,11 @@ void ip_rt_multicast_event(struct in_device *in_dev) } #ifdef CONFIG_SYSCTL +static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; +static int ip_rt_gc_interval __read_mostly = 60 * HZ; +static int ip_rt_gc_min_interval __read_mostly = HZ / 2; +static int ip_rt_gc_elasticity __read_mostly = 8; + static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b236ef0..397e0f6 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -232,7 +232,8 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, * * return false if we decode an option that should not be. */ -bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok) +bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, + struct net *net, bool *ecn_ok) { /* echoed timestamp, lowest bits contain options */ u32 options = tcp_opt->rcv_tsecr & TSMASK; @@ -247,7 +248,7 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok) tcp_opt->sack_ok = (options & (1 << 4)) ? TCP_SACK_SEEN : 0; *ecn_ok = (options >> 5) & 1; - if (*ecn_ok && !sysctl_tcp_ecn) + if (*ecn_ok && !net->ipv4.sysctl_tcp_ecn) return false; if (tcp_opt->sack_ok && !sysctl_tcp_sack) @@ -295,7 +296,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); - if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) + if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; ret = NULL; @@ -348,8 +349,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), - RT_SCOPE_UNIVERSE, IPPROTO_TCP, + flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, ireq->loc_addr, th->source, th->dest); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d84400b..960fd29 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -27,6 +27,7 @@ #include <net/tcp_memcontrol.h> static int zero; +static int one = 1; static int two = 2; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; @@ -232,8 +233,8 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, return 0; } -int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, - size_t *lenp, loff_t *ppos) +static int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, + size_t *lenp, loff_t *ppos) { ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; struct tcp_fastopen_context *ctxt; @@ -538,13 +539,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "tcp_ecn", - .data = &sysctl_tcp_ecn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_dsack", .data = &sysctl_tcp_dsack, .maxlen = sizeof(int), @@ -556,14 +550,16 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_wmem, .maxlen = sizeof(sysctl_tcp_wmem), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "tcp_rmem", .data = &sysctl_tcp_rmem, .maxlen = sizeof(sysctl_tcp_rmem), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "tcp_app_win", @@ -637,13 +633,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_tcp_congestion_control, }, { - .procname = "tcp_abc", - .data = &sysctl_tcp_abc, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { .procname = "tcp_mtu_probing", .data = &sysctl_tcp_mtu_probing, .maxlen = sizeof(int), @@ -786,7 +775,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(sysctl_udp_rmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero + .extra1 = &one }, { .procname = "udp_wmem_min", @@ -794,7 +783,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(sysctl_udp_wmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero + .extra1 = &one }, { } }; @@ -850,6 +839,13 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = ipv4_ping_group_range, }, { + .procname = "tcp_ecn", + .data = &init_net.ipv4.sysctl_tcp_ecn, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .procname = "tcp_mem", .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem), .mode = 0644, @@ -882,6 +878,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) &net->ipv4.sysctl_icmp_ratemask; table[6].data = &net->ipv4.sysctl_ping_group_range; + table[7].data = + &net->ipv4.sysctl_tcp_ecn; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1ca2536..e220207 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -400,6 +400,8 @@ void tcp_init_sock(struct sock *sk) tcp_enable_early_retrans(tp); icsk->icsk_ca_ops = &tcp_init_congestion_ops; + tp->tsoffset = 0; + sk->sk_state = TCP_CLOSE; sk->sk_write_space = sk_stream_write_space; @@ -773,7 +775,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) * Make sure that we have exactly size bytes * available to the caller, no more, no less. */ - skb->avail_size = size; + skb->reserved_tailroom = skb->end - skb->tail - size; return skb; } __kfree_skb(skb); @@ -895,6 +897,7 @@ new_segment: get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); } + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; skb->len += copy; skb->data_len += copy; @@ -1406,10 +1409,10 @@ static void tcp_service_net_dma(struct sock *sk, bool wait) return; last_issued = tp->ucopy.dma_cookie; - dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + dma_async_issue_pending(tp->ucopy.dma_chan); do { - if (dma_async_memcpy_complete(tp->ucopy.dma_chan, + if (dma_async_is_tx_complete(tp->ucopy.dma_chan, last_issued, &done, &used) == DMA_SUCCESS) { /* Safe to free early-copied skbs now */ @@ -1428,12 +1431,12 @@ static void tcp_service_net_dma(struct sock *sk, bool wait) } #endif -static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) +static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) { struct sk_buff *skb; u32 offset; - skb_queue_walk(&sk->sk_receive_queue, skb) { + while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { offset = seq - TCP_SKB_CB(skb)->seq; if (tcp_hdr(skb)->syn) offset--; @@ -1441,6 +1444,11 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) *off = offset; return skb; } + /* This looks weird, but this can happen if TCP collapsing + * splitted a fat GRO packet, while we released socket lock + * in skb_splice_bits() + */ + sk_eat_skb(sk, skb, false); } return NULL; } @@ -1482,7 +1490,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, break; } used = recv_actor(desc, skb, offset, len); - if (used < 0) { + if (used <= 0) { if (!copied) copied = used; break; @@ -1520,8 +1528,10 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ - if (copied > 0) + if (copied > 0) { + tcp_recv_skb(sk, seq, &offset); tcp_cleanup_rbuf(sk, copied); + } return copied; } EXPORT_SYMBOL(tcp_read_sock); @@ -1744,7 +1754,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, tcp_service_net_dma(sk, true); tcp_cleanup_rbuf(sk, copied); } else - dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + dma_async_issue_pending(tp->ucopy.dma_chan); } #endif if (copied >= target) { @@ -1837,7 +1847,7 @@ do_prequeue: break; } - dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + dma_async_issue_pending(tp->ucopy.dma_chan); if ((offset + used) == skb->len) copied_early = true; @@ -2280,7 +2290,6 @@ int tcp_disconnect(struct sock *sk, int flags) tp->packets_out = 0; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_cnt = 0; - tp->bytes_acked = 0; tp->window_clamp = 0; tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); @@ -2704,6 +2713,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else err = -EINVAL; break; + case TCP_TIMESTAMP: + if (!tp->repair) + err = -EPERM; + else + tp->tsoffset = val - tcp_time_stamp; + break; default: err = -ENOPROTOOPT; break; @@ -2952,6 +2967,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_USER_TIMEOUT: val = jiffies_to_msecs(icsk->icsk_user_timeout); break; + case TCP_TIMESTAMP: + val = tcp_time_stamp + tp->tsoffset; + break; default: return -ENOPROTOOPT; } @@ -3025,6 +3043,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | + SKB_GSO_GRE | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; @@ -3236,7 +3255,7 @@ __tcp_alloc_md5sig_pool(struct sock *sk) struct crypto_hash *hash; hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (!hash || IS_ERR(hash)) + if (IS_ERR_OR_NULL(hash)) goto out_free; per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 291f2ed..019c238 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -310,35 +310,24 @@ void tcp_slow_start(struct tcp_sock *tp) { int cnt; /* increase in packets */ unsigned int delta = 0; + u32 snd_cwnd = tp->snd_cwnd; - /* RFC3465: ABC Slow start - * Increase only after a full MSS of bytes is acked - * - * TCP sender SHOULD increase cwnd by the number of - * previously unacknowledged bytes ACKed by each incoming - * acknowledgment, provided the increase is not more than L - */ - if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache) - return; + if (unlikely(!snd_cwnd)) { + pr_err_once("snd_cwnd is nul, please report this bug.\n"); + snd_cwnd = 1U; + } if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh) cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */ else - cnt = tp->snd_cwnd; /* exponential increase */ - - /* RFC3465: ABC - * We MAY increase by 2 if discovered delayed ack - */ - if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) - cnt <<= 1; - tp->bytes_acked = 0; + cnt = snd_cwnd; /* exponential increase */ tp->snd_cwnd_cnt += cnt; - while (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - tp->snd_cwnd_cnt -= tp->snd_cwnd; + while (tp->snd_cwnd_cnt >= snd_cwnd) { + tp->snd_cwnd_cnt -= snd_cwnd; delta++; } - tp->snd_cwnd = min(tp->snd_cwnd + delta, tp->snd_cwnd_clamp); + tp->snd_cwnd = min(snd_cwnd + delta, tp->snd_cwnd_clamp); } EXPORT_SYMBOL_GPL(tcp_slow_start); @@ -372,20 +361,9 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) /* In "safe" area, increase. */ if (tp->snd_cwnd <= tp->snd_ssthresh) tcp_slow_start(tp); - /* In dangerous area, increase slowly. */ - else if (sysctl_tcp_abc) { - /* RFC3465: Appropriate Byte Count - * increase once for each full cwnd acked - */ - if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) { - tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache; - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - } - } else { + else tcp_cong_avoid_ai(tp, tp->snd_cwnd); - } } EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a136925..13b9c08 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -81,8 +81,6 @@ int sysctl_tcp_sack __read_mostly = 1; int sysctl_tcp_fack __read_mostly = 1; int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; EXPORT_SYMBOL(sysctl_tcp_reordering); -int sysctl_tcp_ecn __read_mostly = 2; -EXPORT_SYMBOL(sysctl_tcp_ecn); int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; @@ -100,7 +98,6 @@ int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_abc __read_mostly; int sysctl_tcp_early_retrans __read_mostly = 2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ @@ -116,6 +113,7 @@ int sysctl_tcp_early_retrans __read_mostly = 2; #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ +#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -2009,7 +2007,6 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; tp->frto_counter = 0; - tp->bytes_acked = 0; tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); @@ -2058,17 +2055,13 @@ void tcp_enter_loss(struct sock *sk, int how) tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; - tp->bytes_acked = 0; tcp_clear_retrans_partial(tp); if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); - if (!how) { - /* Push undo marker, if it was plain RTO and nothing - * was retransmitted. */ - tp->undo_marker = tp->snd_una; - } else { + tp->undo_marker = tp->snd_una; + if (how) { tp->sacked_out = 0; tp->fackets_out = 0; } @@ -2686,7 +2679,6 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) struct tcp_sock *tp = tcp_sk(sk); tp->high_seq = tp->snd_nxt; - tp->bytes_acked = 0; tp->snd_cwnd_cnt = 0; tp->prior_cwnd = tp->snd_cwnd; tp->prr_delivered = 0; @@ -2737,7 +2729,6 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) struct tcp_sock *tp = tcp_sk(sk); tp->prior_ssthresh = 0; - tp->bytes_acked = 0; if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { tp->undo_marker = 0; tcp_init_cwnd_reduction(sk, set_ssthresh); @@ -3419,7 +3410,6 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) { tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); tp->snd_cwnd_cnt = 0; - tp->bytes_acked = 0; TCP_ECN_queue_cwr(tp); tcp_moderate_cwnd(tp); } @@ -3504,6 +3494,11 @@ static bool tcp_process_frto(struct sock *sk, int flag) } } else { if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { + if (!tcp_packets_in_flight(tp)) { + tcp_enter_frto_loss(sk, 2, flag); + return true; + } + /* Prevent sending of new data. */ tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)); @@ -3570,6 +3565,27 @@ static void tcp_send_challenge_ack(struct sock *sk) } } +static void tcp_store_ts_recent(struct tcp_sock *tp) +{ + tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; + tp->rx_opt.ts_recent_stamp = get_seconds(); +} + +static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) +{ + if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { + /* PAWS bug workaround wrt. ACK frames, the PAWS discard + * extra check below makes sure this can only happen + * for pure ACK frames. -DaveM + * + * Not only, also it occurs for expired timestamps. + */ + + if (tcp_paws_check(&tp->rx_opt, 0)) + tcp_store_ts_recent(tp); + } +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -3610,18 +3626,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, prior_snd_una)) flag |= FLAG_SND_UNA_ADVANCED; - if (sysctl_tcp_abc) { - if (icsk->icsk_ca_state < TCP_CA_CWR) - tp->bytes_acked += ack - prior_snd_una; - else if (icsk->icsk_ca_state == TCP_CA_Loss) - /* we assume just one segment left network */ - tp->bytes_acked += min(ack - prior_snd_una, - tp->mss_cache); - } - prior_fackets = tp->fackets_out; prior_in_flight = tcp_packets_in_flight(tp); + /* ts_recent update must be made after we are sure that the packet + * is in window. + */ + if (flag & FLAG_UPDATE_TS_RECENT) + tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); + if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { /* Window is constant, pure forward advance. * No more checks are required. @@ -3872,7 +3885,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr ++ptr; tp->rx_opt.rcv_tsval = ntohl(*ptr); ++ptr; - tp->rx_opt.rcv_tsecr = ntohl(*ptr); + tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset; return true; } return false; @@ -3896,7 +3909,11 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, if (tcp_parse_aligned_timestamp(tp, th)) return true; } + tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); + if (tp->rx_opt.saw_tstamp) + tp->rx_opt.rcv_tsecr -= tp->tsoffset; + return true; } @@ -3938,27 +3955,6 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) EXPORT_SYMBOL(tcp_parse_md5sig_option); #endif -static inline void tcp_store_ts_recent(struct tcp_sock *tp) -{ - tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; - tp->rx_opt.ts_recent_stamp = get_seconds(); -} - -static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) -{ - if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { - /* PAWS bug workaround wrt. ACK frames, the PAWS discard - * extra check below makes sure this can only happen - * for pure ACK frames. -DaveM - * - * Not only, also it occurs for expired timestamps. - */ - - if (tcp_paws_check(&tp->rx_opt, 0)) - tcp_store_ts_recent(tp); - } -} - /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM * * It is not fatal. If this ACK does _not_ change critical state (seqs, window) @@ -5493,6 +5489,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (tcp_checksum_complete_user(sk, skb)) goto csum_error; + if ((int)skb->truesize > sk->sk_forward_alloc) + goto step5; + /* Predicted packet is in window by definition. * seq == rcv_nxt and rcv_wup <= rcv_nxt. * Hence, check seq<=rcv_wup reduces to: @@ -5504,9 +5503,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_rcv_rtt_measure_ts(sk, skb); - if ((int)skb->truesize > sk->sk_forward_alloc) - goto step5; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ @@ -5543,6 +5539,9 @@ slow_path: if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb)) goto csum_error; + if (!th->ack && !th->rst) + goto discard; + /* * Standard slow path. */ @@ -5551,14 +5550,9 @@ slow_path: return 0; step5: - if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) + if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) goto discard; - /* ts_recent update must be made after we are sure that the packet - * is in window. - */ - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - tcp_rcv_rtt_measure_ts(sk, skb); /* Process urgent data. */ @@ -5646,8 +5640,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, * the remote receives only the retransmitted (regular) SYNs: either * the original SYN-data or the corresponding SYN-ACK is lost. */ - syn_drop = (cookie->len <= 0 && data && - inet_csk(sk)->icsk_retransmits); + syn_drop = (cookie->len <= 0 && data && tp->total_retrans); tcp_fastopen_cache_set(sk, mss, cookie, syn_drop); @@ -5675,6 +5668,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, int saved_clamp = tp->rx_opt.mss_clamp; tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); + if (tp->rx_opt.saw_tstamp) + tp->rx_opt.rcv_tsecr -= tp->tsoffset; if (th->ack) { /* rfc793: @@ -5984,12 +5979,17 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (tcp_check_req(sk, skb, req, NULL, true) == NULL) goto discard; } + + if (!th->ack && !th->rst) + goto discard; + if (!tcp_validate_incoming(sk, skb, th, 0)) return 0; /* step 5: check the ACK field */ - if (th->ack) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; + if (true) { + int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | + FLAG_UPDATE_TS_RECENT) > 0; switch (sk->sk_state) { case TCP_SYN_RECV: @@ -6138,13 +6138,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } break; } - } else - goto discard; - - /* ts_recent update must be made after we are sure that the packet - * is in window. - */ - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); + } /* step 6: check the URG bit */ tcp_urg(sk, skb, th); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 54139fa..d09203c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -274,13 +274,6 @@ static void tcp_v4_mtu_reduced(struct sock *sk) struct inet_sock *inet = inet_sk(sk); u32 mtu = tcp_sk(sk)->mtu_info; - /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs - * send out by Linux are always <576bytes so they should go through - * unfragmented). - */ - if (sk->sk_state == TCP_LISTEN) - return; - dst = inet_csk_update_pmtu(sk, mtu); if (!dst) return; @@ -369,11 +362,10 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) * We do take care of PMTU discovery (RFC1191) special case : * we can receive locally generated ICMP messages while socket is held. */ - if (sock_owned_by_user(sk) && - type != ICMP_DEST_UNREACH && - code != ICMP_FRAG_NEEDED) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); - + if (sock_owned_by_user(sk)) { + if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)) + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + } if (sk->sk_state == TCP_CLOSE) goto out; @@ -409,6 +401,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ + /* We are not interested in TCP_LISTEN and open_requests + * (SYN-ACKs send out by Linux are always <576bytes so + * they should go through unfragmented). + */ + if (sk->sk_state == TCP_LISTEN) + goto out; + tp->mtu_info = info; if (!sock_owned_by_user(sk)) { tcp_v4_mtu_reduced(sk); @@ -497,6 +496,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) * errors returned from accept(). */ inet_csk_reqsk_queue_drop(sk, req, prev); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); goto out; case TCP_SYN_SENT: @@ -657,7 +657,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) * no RST generated if md5 hash doesn't match. */ sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev), - &tcp_hashinfo, ip_hdr(skb)->daddr, + &tcp_hashinfo, ip_hdr(skb)->saddr, + th->source, ip_hdr(skb)->daddr, ntohs(th->source), inet_iif(skb)); /* don't send rst if it can't find key */ if (!sk1) @@ -725,7 +726,7 @@ release_sk1: */ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 ts, int oif, + u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int reply_flags, u8 tos) { @@ -746,12 +747,12 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, arg.iov[0].iov_base = (unsigned char *)&rep; arg.iov[0].iov_len = sizeof(rep.th); - if (ts) { + if (tsecr) { rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - rep.opt[1] = htonl(tcp_time_stamp); - rep.opt[2] = htonl(ts); + rep.opt[1] = htonl(tsval); + rep.opt[2] = htonl(tsecr); arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; } @@ -766,7 +767,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, #ifdef CONFIG_TCP_MD5SIG if (key) { - int offset = (ts) ? 3 : 0; + int offset = (tsecr) ? 3 : 0; rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -801,6 +802,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, + tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), @@ -820,6 +822,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, tcp_rsk(req)->rcv_nxt, req->rcv_wnd, + tcp_time_stamp, req->ts_recent, 0, tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, @@ -951,7 +954,6 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; - struct hlist_node *pos; unsigned int size = sizeof(struct in_addr); struct tcp_md5sig_info *md5sig; @@ -965,7 +967,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, if (family == AF_INET6) size = sizeof(struct in6_addr); #endif - hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) { + hlist_for_each_entry_rcu(key, &md5sig->head, node) { if (key->family != family) continue; if (!memcmp(&key->addr, addr, size)) @@ -1066,14 +1068,14 @@ static void tcp_clear_md5_list(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; - struct hlist_node *pos, *n; + struct hlist_node *n; struct tcp_md5sig_info *md5sig; md5sig = rcu_dereference_protected(tp->md5sig_info, 1); if (!hlist_empty(&md5sig->head)) tcp_free_md5sig_pool(); - hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) { + hlist_for_each_entry_safe(key, n, &md5sig->head, node) { hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); @@ -1501,8 +1503,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * clogging syn queue with openreqs with exponentially increasing * timeout. */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); goto drop; + } req = inet_reqsk_alloc(&tcp_request_sock_ops); if (!req) @@ -1568,7 +1572,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb); + TCP_ECN_create_request(req, skb, sock_net(sk)); if (want_cookie) { isn = cookie_v4_init_sequence(sk, skb, &req->mss); @@ -1667,6 +1671,7 @@ drop_and_release: drop_and_free: reqsk_free(req); drop: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; } EXPORT_SYMBOL(tcp_v4_conn_request); @@ -2074,6 +2079,7 @@ do_time_wait: case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, + iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); if (sk2) { @@ -2609,7 +2615,7 @@ EXPORT_SYMBOL(tcp_proc_register); void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) { - proc_net_remove(net, afinfo->name); + remove_proc_entry(afinfo->name, net->proc_net); } EXPORT_SYMBOL(tcp_proc_unregister); @@ -2888,6 +2894,7 @@ EXPORT_SYMBOL(tcp_prot); static int __net_init tcp_sk_init(struct net *net) { + net->ipv4.sysctl_tcp_ecn = 2; return 0; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f35f2df..b83a49c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -102,6 +102,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); if (tmp_opt.saw_tstamp) { + tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; tmp_opt.ts_recent = tcptw->tw_ts_recent; tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; paws_reject = tcp_paws_reject(&tmp_opt, th->rst); @@ -288,6 +289,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcptw->tw_rcv_wnd = tcp_receive_window(tp); tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; + tcptw->tw_ts_offset = tp->tsoffset; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { @@ -446,7 +448,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, */ newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; - newtp->bytes_acked = 0; newtp->frto_counter = 0; newtp->frto_highmark = 0; @@ -500,6 +501,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.ts_recent_stamp = 0; newtp->tcp_header_len = sizeof(struct tcphdr); } + newtp->tsoffset = 0; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ if (newtp->af_specific->md5_lookup(sk, newsk)) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5d45159..509912a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -314,7 +314,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); tp->ecn_flags = 0; - if (sysctl_tcp_ecn == 1) { + if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1) { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; tp->ecn_flags = TCP_ECN_OK; } @@ -622,7 +622,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { opts->options |= OPTION_TS; - opts->tsval = TCP_SKB_CB(skb)->when; + opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -806,7 +806,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb if (likely(tp->rx_opt.tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = tcb ? tcb->when : 0; + opts->tsval = tcb ? tcb->when + tp->tsoffset : 0; opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } @@ -1298,7 +1298,6 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) eat = min_t(int, len, skb_headlen(skb)); if (eat) { __skb_pull(skb, eat); - skb->avail_size -= eat; len -= eat; if (!len) return; @@ -1331,7 +1330,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) /* Remove acked data from a packet in the transmit queue. */ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { - if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; __pskb_trim_head(skb, len); @@ -1351,8 +1350,8 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) return 0; } -/* Calculate MSS. Not accounting for SACKs here. */ -int tcp_mtu_to_mss(struct sock *sk, int pmtu) +/* Calculate MSS not accounting any TCP options. */ +static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) { const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1381,13 +1380,17 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu) /* Then reserve room for full set of TCP options and 8 bytes of data */ if (mss_now < 48) mss_now = 48; - - /* Now subtract TCP options size, not including SACKs */ - mss_now -= tp->tcp_header_len - sizeof(struct tcphdr); - return mss_now; } +/* Calculate MSS. Not accounting for SACKs here. */ +int tcp_mtu_to_mss(struct sock *sk, int pmtu) +{ + /* Subtract TCP options size, not including SACKs */ + return __tcp_mtu_to_mss(sk, pmtu) - + (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr)); +} + /* Inverse of above */ int tcp_mss_to_mtu(struct sock *sk, int mss) { @@ -1806,8 +1809,11 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) goto send_now; } - /* Ok, it looks like it is advisable to defer. */ - tp->tso_deferred = 1 | (jiffies << 1); + /* Ok, it looks like it is advisable to defer. + * Do not rearm the timer if already set to not break TCP ACK clocking. + */ + if (!tp->tso_deferred) + tp->tso_deferred = 1 | (jiffies << 1); return true; @@ -2382,8 +2388,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ TCP_SKB_CB(skb)->when = tcp_time_stamp; - /* make sure skb->data is aligned on arches that require it */ - if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { + /* make sure skb->data is aligned on arches that require it + * and check if ack-trimming & collapsing extended the headroom + * beyond what csum_start can cover. + */ + if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || + skb_headroom(skb) >= 0xFFFF)) { struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : @@ -2703,6 +2713,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, MAX_TCP_HEADER); skb_dst_set(skb, dst); + security_skb_owned_by(skb, sk); mss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) @@ -2930,7 +2941,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) */ if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp) tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; - space = tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - + space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - MAX_TCP_OPTION_SPACE; syn_data = skb_copy_expand(syn, skb_headroom(syn), space, diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 4526fe6..d4943f6 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -234,7 +234,7 @@ static __init int tcpprobe_init(void) if (!tcp_probe.log) goto err0; - if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &tcpprobe_fops)) + if (!proc_create(procname, S_IRUSR, init_net.proc_net, &tcpprobe_fops)) goto err0; ret = register_jprobe(&tcp_jprobe); @@ -244,7 +244,7 @@ static __init int tcpprobe_init(void) pr_info("probe registered (port=%d) bufsize=%u\n", port, bufsize); return 0; err1: - proc_net_remove(&init_net, procname); + remove_proc_entry(procname, init_net.proc_net); err0: kfree(tcp_probe.log); return ret; @@ -253,7 +253,7 @@ module_init(tcpprobe_init); static __exit void tcpprobe_exit(void) { - proc_net_remove(&init_net, procname); + remove_proc_entry(procname, init_net.proc_net); unregister_jprobe(&tcp_jprobe); kfree(tcp_probe.log); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 79c8dbe..0a073a2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -139,6 +139,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, { struct sock *sk2; struct hlist_nulls_node *node; + kuid_t uid = sock_i_uid(sk); sk_nulls_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && @@ -147,6 +148,8 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (!sk2->sk_reuseport || !sk->sk_reuseport || + !uid_eq(uid, sock_i_uid(sk2))) && (*saddr_comp)(sk, sk2)) { if (bitmap) __set_bit(udp_sk(sk2)->udp_port_hash >> log, @@ -169,6 +172,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, { struct sock *sk2; struct hlist_nulls_node *node; + kuid_t uid = sock_i_uid(sk); int res = 0; spin_lock(&hslot2->lock); @@ -179,6 +183,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (!sk2->sk_reuseport || !sk->sk_reuseport || + !uid_eq(uid, sock_i_uid(sk2))) && (*saddr_comp)(sk, sk2)) { res = 1; break; @@ -337,26 +343,26 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, !ipv6_only_sock(sk)) { struct inet_sock *inet = inet_sk(sk); - score = (sk->sk_family == PF_INET ? 1 : 0); + score = (sk->sk_family == PF_INET ? 2 : 1); if (inet->inet_rcv_saddr) { if (inet->inet_rcv_saddr != daddr) return -1; - score += 2; + score += 4; } if (inet->inet_daddr) { if (inet->inet_daddr != saddr) return -1; - score += 2; + score += 4; } if (inet->inet_dport) { if (inet->inet_dport != sport) return -1; - score += 2; + score += 4; } if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) return -1; - score += 2; + score += 4; } } return score; @@ -365,7 +371,6 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, /* * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num) */ -#define SCORE2_MAX (1 + 2 + 2 + 2) static inline int compute_score2(struct sock *sk, struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned int hnum, int dif) @@ -380,21 +385,21 @@ static inline int compute_score2(struct sock *sk, struct net *net, if (inet->inet_num != hnum) return -1; - score = (sk->sk_family == PF_INET ? 1 : 0); + score = (sk->sk_family == PF_INET ? 2 : 1); if (inet->inet_daddr) { if (inet->inet_daddr != saddr) return -1; - score += 2; + score += 4; } if (inet->inet_dport) { if (inet->inet_dport != sport) return -1; - score += 2; + score += 4; } if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) return -1; - score += 2; + score += 4; } } return score; @@ -409,19 +414,29 @@ static struct sock *udp4_lib_lookup2(struct net *net, { struct sock *sk, *result; struct hlist_nulls_node *node; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; begin: result = NULL; - badness = -1; + badness = 0; udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { score = compute_score2(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { result = sk; badness = score; - if (score == SCORE2_MAX) - goto exact_match; + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = inet_ehashfn(net, daddr, hnum, + saddr, htons(sport)); + matches = 1; + } + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* @@ -431,9 +446,7 @@ begin: */ if (get_nulls_value(node) != slot2) goto begin; - if (result) { -exact_match: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, @@ -457,7 +470,8 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; rcu_read_lock(); if (hslot->count > 10) { @@ -486,13 +500,24 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, } begin: result = NULL; - badness = -1; + badness = 0; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = inet_ehashfn(net, daddr, hnum, + saddr, htons(sport)); + matches = 1; + } + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* @@ -971,7 +996,7 @@ back_from_confirm: sizeof(struct udphdr), &ipc, &rt, msg->msg_flags); err = PTR_ERR(skb); - if (skb && !IS_ERR(skb)) + if (!IS_ERR_OR_NULL(skb)) err = udp_send_skb(skb, fl4); goto out; } @@ -1737,9 +1762,16 @@ int udp_rcv(struct sk_buff *skb) void udp_destroy_sock(struct sock *sk) { + struct udp_sock *up = udp_sk(sk); bool slow = lock_sock_fast(sk); udp_flush_pending_frames(sk); unlock_sock_fast(sk, slow); + if (static_key_false(&udp_encap_needed) && up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = ACCESS_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } } /* @@ -1952,6 +1984,7 @@ struct proto udp_prot = { .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, .backlog_rcv = __udp_queue_rcv_skb, + .release_cb = ip4_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, @@ -2096,7 +2129,7 @@ EXPORT_SYMBOL(udp_proc_register); void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) { - proc_net_remove(net, afinfo->name); + remove_proc_entry(afinfo->name, net->proc_net); } EXPORT_SYMBOL(udp_proc_unregister); @@ -2279,7 +2312,8 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 06814b6..1f12c8b 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -132,7 +132,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) * header and optional ESP marker bytes) and then modify the * protocol to ESP, and then call into the transform receiver. */ - if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto drop; /* Now we can update and verify the packet length... */ diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index ddee0a0..fe5189e 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -142,8 +142,8 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) for_each_input_rcu(rcv_notify_handlers, handler) handler->handler(skb); - if (skb_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + err = skb_unclone(skb, GFP_ATOMIC); + if (err) goto out; if (x->props.flags & XFRM_STATE_DECAP_DSCP) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 3be0ac2..9a459be 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -262,21 +262,56 @@ static struct ctl_table xfrm4_policy_table[] = { { } }; -static struct ctl_table_header *sysctl_hdr; -#endif - -static void __init xfrm4_policy_init(void) +static int __net_init xfrm4_net_init(struct net *net) { - xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = xfrm4_policy_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(xfrm4_policy_table), GFP_KERNEL); + if (!table) + goto err_alloc; + + table[0].data = &net->xfrm.xfrm4_dst_ops.gc_thresh; + } + + hdr = register_net_sysctl(net, "net/ipv4", table); + if (!hdr) + goto err_reg; + + net->ipv4.xfrm4_hdr = hdr; + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; } -static void __exit xfrm4_policy_fini(void) +static void __net_exit xfrm4_net_exit(struct net *net) { -#ifdef CONFIG_SYSCTL - if (sysctl_hdr) - unregister_net_sysctl_table(sysctl_hdr); + struct ctl_table *table; + + if (net->ipv4.xfrm4_hdr == NULL) + return; + + table = net->ipv4.xfrm4_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv4.xfrm4_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static struct pernet_operations __net_initdata xfrm4_net_ops = { + .init = xfrm4_net_init, + .exit = xfrm4_net_exit, +}; #endif - xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); + +static void __init xfrm4_policy_init(void) +{ + xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); } void __init xfrm4_init(void) @@ -286,8 +321,7 @@ void __init xfrm4_init(void) xfrm4_state_init(); xfrm4_policy_init(); #ifdef CONFIG_SYSCTL - sysctl_hdr = register_net_sysctl(&init_net, "net/ipv4", - xfrm4_policy_table); + register_pernet_subsys(&xfrm4_net_ops); #endif } diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 4f7fe72..ed0b9e2 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -11,7 +11,7 @@ menuconfig IPV6 You will still be able to do traditional IPv4 networking as well. For general information about IPv6, see - <http://playground.sun.com/pub/ipng/html/ipng-main.html>. + <https://en.wikipedia.org/wiki/IPv6>. For Linux IPv6 development information, see <http://www.linux-ipv6.org>. For specific information about IPv6 under Linux, read the HOWTO at <http://www.bieringer.de/linux/IPv6/>. @@ -50,16 +50,15 @@ config IPV6_ROUTER_PREF If unsure, say N. config IPV6_ROUTE_INFO - bool "IPv6: Route Information (RFC 4191) support (EXPERIMENTAL)" - depends on IPV6_ROUTER_PREF && EXPERIMENTAL + bool "IPv6: Route Information (RFC 4191) support" + depends on IPV6_ROUTER_PREF ---help--- This is experimental support of Route Information. If unsure, say N. config IPV6_OPTIMISTIC_DAD - bool "IPv6: Enable RFC 4429 Optimistic DAD (EXPERIMENTAL)" - depends on EXPERIMENTAL + bool "IPv6: Enable RFC 4429 Optimistic DAD" ---help--- This is experimental support for optimistic Duplicate Address Detection. It allows for autoconfigured addresses @@ -105,8 +104,7 @@ config INET6_IPCOMP If unsure, say Y. config IPV6_MIP6 - tristate "IPv6: Mobility (EXPERIMENTAL)" - depends on EXPERIMENTAL + tristate "IPv6: Mobility" select XFRM ---help--- Support for IPv6 Mobility described in RFC 3775. @@ -150,8 +148,7 @@ config INET6_XFRM_MODE_BEET If unsure, say Y. config INET6_XFRM_MODE_ROUTEOPTIMIZATION - tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)" - depends on EXPERIMENTAL + tristate "IPv6: MIPv6 route optimization mode" select XFRM ---help--- Support for MIPv6 route optimization mode. @@ -171,8 +168,8 @@ config IPV6_SIT Saying M here will produce a module called sit. If unsure, say Y. config IPV6_SIT_6RD - bool "IPv6: IPv6 Rapid Deployment (6RD) (EXPERIMENTAL)" - depends on IPV6_SIT && EXPERIMENTAL + bool "IPv6: IPv6 Rapid Deployment (6RD)" + depends on IPV6_SIT default n ---help--- IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon @@ -219,7 +216,6 @@ config IPV6_GRE config IPV6_MULTIPLE_TABLES bool "IPv6: Multiple Routing Tables" - depends on EXPERIMENTAL select FIB_RULES ---help--- Support multiple routing tables. @@ -239,8 +235,8 @@ config IPV6_SUBTREES If unsure, say N. config IPV6_MROUTE - bool "IPv6: multicast routing (EXPERIMENTAL)" - depends on IPV6 && EXPERIMENTAL + bool "IPv6: multicast routing" + depends on IPV6 ---help--- Experimental support for IPv6 multicast forwarding. If unsure, say N. @@ -260,7 +256,7 @@ config IPV6_MROUTE_MULTIPLE_TABLES If unsure, say N. config IPV6_PIMSM_V2 - bool "IPv6: PIM-SM version 2 support (EXPERIMENTAL)" + bool "IPv6: PIM-SM version 2 support" depends on IPV6_MROUTE ---help--- Support for IPv6 PIM multicast routing protocol PIM-SMv2. diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 4ea2448..309af19 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -40,7 +40,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o -obj-y += addrconf_core.o exthdrs_core.o +obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 408cac4a..dae802c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -110,10 +110,6 @@ static inline u32 cstamp_delta(unsigned long cstamp) return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; } -#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1) -#define ADDRCONF_TIMER_FUZZ (HZ / 4) -#define ADDRCONF_TIMER_FUZZ_MAX (HZ) - #ifdef CONFIG_SYSCTL static void addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct inet6_dev *idev); @@ -154,6 +150,11 @@ static void addrconf_type_change(struct net_device *dev, unsigned long event); static int addrconf_ifdown(struct net_device *dev, int how); +static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, + int plen, + const struct net_device *dev, + u32 flags, u32 noflags); + static void addrconf_dad_start(struct inet6_ifaddr *ifp); static void addrconf_dad_timer(unsigned long data); static void addrconf_dad_completed(struct inet6_ifaddr *ifp); @@ -167,8 +168,6 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, struct net_device *dev); -static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); - static struct ipv6_devconf ipv6_devconf __read_mostly = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, @@ -243,6 +242,9 @@ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; /* Check if a valid qdisc is available */ static inline bool addrconf_qdisc_ok(const struct net_device *dev) @@ -250,12 +252,6 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev) return !qdisc_tx_is_noop(dev); } -/* Check if a route is valid prefix route */ -static inline int addrconf_is_prefix_route(const struct rt6_info *rt) -{ - return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0; -} - static void addrconf_del_timer(struct inet6_ifaddr *ifp) { if (del_timer(&ifp->timer)) @@ -433,6 +429,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) /* protected by rtnl_lock */ rcu_assign_pointer(dev->ip6_ptr, ndev); + /* Join interface-local all-node multicast group */ + ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes); + /* Join all-node multicast group */ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); @@ -616,10 +615,15 @@ static void dev_forward_change(struct inet6_dev *idev) if (idev->cnf.forwarding) dev_disable_lro(dev); if (dev->flags & IFF_MULTICAST) { - if (idev->cnf.forwarding) + if (idev->cnf.forwarding) { ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); - else + ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allrouters); + ipv6_dev_mc_inc(dev, &in6addr_sitelocal_allrouters); + } else { ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters); + ipv6_dev_mc_dec(dev, &in6addr_interfacelocal_allrouters); + ipv6_dev_mc_dec(dev, &in6addr_sitelocal_allrouters); + } } list_for_each_entry(ifa, &idev->addr_list, if_list) { @@ -831,7 +835,7 @@ out2: rcu_read_unlock_bh(); if (likely(err == 0)) - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); + inet6addr_notifier_call_chain(NETDEV_UP, ifa); else { kfree(ifa); ifa = ERR_PTR(err); @@ -921,7 +925,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ipv6_ifa_notify(RTM_DELADDR, ifp); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); + inet6addr_notifier_call_chain(NETDEV_DOWN, ifp); /* * Purge or update corresponding prefix @@ -941,17 +945,15 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { struct in6_addr prefix; struct rt6_info *rt; - struct net *net = dev_net(ifp->idev->dev); - struct flowi6 fl6 = {}; ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); - fl6.flowi6_oif = ifp->idev->dev->ifindex; - fl6.daddr = prefix; - rt = (struct rt6_info *)ip6_route_lookup(net, &fl6, - RT6_LOOKUP_F_IFACE); - if (rt != net->ipv6.ip6_null_entry && - addrconf_is_prefix_route(rt)) { + rt = addrconf_get_prefix_route(&prefix, + ifp->prefix_len, + ifp->idev->dev, + 0, RTF_GATEWAY | RTF_DEFAULT); + + if (rt) { if (onlink == 0) { ip6_del_rt(rt); rt = NULL; @@ -1054,7 +1056,7 @@ retry: ipv6_add_addr(idev, &addr, tmp_plen, ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, addr_flags) : NULL; - if (!ift || IS_ERR(ift)) { + if (IS_ERR_OR_NULL(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); pr_info("%s: retry temporary address regeneration\n", __func__); @@ -1415,11 +1417,10 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, struct net_device *dev, int strict) { struct inet6_ifaddr *ifp; - struct hlist_node *node; unsigned int hash = inet6_addr_hash(addr); rcu_read_lock_bh(); - hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && @@ -1441,9 +1442,8 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, { unsigned int hash = inet6_addr_hash(addr); struct inet6_ifaddr *ifp; - struct hlist_node *node; - hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { @@ -1483,10 +1483,9 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add { struct inet6_ifaddr *ifp, *result = NULL; unsigned int hash = inet6_addr_hash(addr); - struct hlist_node *node; rcu_read_lock_bh(); - hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { @@ -1663,6 +1662,7 @@ static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev) if (dev->addr_len != IEEE802154_ADDR_LEN) return -1; memcpy(eui, dev->dev_addr, 8); + eui[0] ^= 2; return 0; } @@ -1877,7 +1877,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, continue; if ((rt->rt6i_flags & flags) != flags) continue; - if ((noflags != 0) && ((rt->rt6i_flags & flags) != 0)) + if ((rt->rt6i_flags & noflags) != 0) continue; dst_hold(&rt->dst); break; @@ -2082,7 +2082,7 @@ ok: addr_type&IPV6_ADDR_SCOPE_MASK, addr_flags); - if (!ifp || IS_ERR(ifp)) { + if (IS_ERR_OR_NULL(ifp)) { in6_dev_put(in6_dev); return; } @@ -2527,6 +2527,9 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) static void init_loopback(struct net_device *dev) { struct inet6_dev *idev; + struct net_device *sp_dev; + struct inet6_ifaddr *sp_ifa; + struct rt6_info *sp_rt; /* ::1 */ @@ -2538,6 +2541,30 @@ static void init_loopback(struct net_device *dev) } add_addr(idev, &in6addr_loopback, 128, IFA_HOST); + + /* Add routes to other interface's IPv6 addresses */ + for_each_netdev(dev_net(dev), sp_dev) { + if (!strcmp(sp_dev->name, dev->name)) + continue; + + idev = __in6_dev_get(sp_dev); + if (!idev) + continue; + + read_lock_bh(&idev->lock); + list_for_each_entry(sp_ifa, &idev->addr_list, if_list) { + + if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE)) + continue; + + sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0); + + /* Failure cases are ignored */ + if (!IS_ERR(sp_rt)) + ip6_ins_rt(sp_rt); + } + read_unlock_bh(&idev->lock); + } } static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr) @@ -2902,11 +2929,10 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Step 2: clear hash table */ for (i = 0; i < IN6_ADDR_HSIZE; i++) { struct hlist_head *h = &inet6_addr_lst[i]; - struct hlist_node *n; spin_lock_bh(&addrconf_hash_lock); restart: - hlist_for_each_entry_rcu(ifa, n, h, addr_lst) { + hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { hlist_del_init_rcu(&ifa->addr_lst); addrconf_del_timer(ifa); @@ -2960,7 +2986,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); + inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); } in6_ifa_put(ifa); @@ -3213,8 +3239,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) } for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { - struct hlist_node *n; - hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket], + hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket], addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; @@ -3239,9 +3264,8 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, { struct if6_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - struct hlist_node *n = &ifa->addr_lst; - hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) { + hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; state->offset++; @@ -3250,7 +3274,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, while (++state->bucket < IN6_ADDR_HSIZE) { state->offset = 0; - hlist_for_each_entry_rcu_bh(ifa, n, + hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket], addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; @@ -3320,14 +3344,14 @@ static const struct file_operations if6_fops = { static int __net_init if6_proc_net_init(struct net *net) { - if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops)) + if (!proc_create("if_inet6", S_IRUGO, net->proc_net, &if6_fops)) return -ENOMEM; return 0; } static void __net_exit if6_proc_net_exit(struct net *net) { - proc_net_remove(net, "if_inet6"); + remove_proc_entry("if_inet6", net->proc_net); } static struct pernet_operations if6_proc_net_ops = { @@ -3352,11 +3376,10 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) { int ret = 0; struct inet6_ifaddr *ifp = NULL; - struct hlist_node *n; unsigned int hash = inet6_addr_hash(addr); rcu_read_lock_bh(); - hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && @@ -3378,7 +3401,6 @@ static void addrconf_verify(unsigned long foo) { unsigned long now, next, next_sec, next_sched; struct inet6_ifaddr *ifp; - struct hlist_node *node; int i; rcu_read_lock_bh(); @@ -3390,7 +3412,7 @@ static void addrconf_verify(unsigned long foo) for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - hlist_for_each_entry_rcu_bh(ifp, node, + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) { unsigned long age; @@ -3861,7 +3883,6 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev; struct inet6_dev *idev; struct hlist_head *head; - struct hlist_node *node; s_h = cb->args[0]; s_idx = idx = cb->args[1]; @@ -3871,7 +3892,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; if (h > s_h || idx > s_idx) @@ -4217,7 +4238,6 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct inet6_dev *idev; struct hlist_head *head; - struct hlist_node *node; s_h = cb->args[0]; s_idx = cb->args[1]; @@ -4226,7 +4246,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; idev = __in6_dev_get(dev); @@ -4789,26 +4809,20 @@ static void addrconf_sysctl_unregister(struct inet6_dev *idev) static int __net_init addrconf_init_net(struct net *net) { - int err; + int err = -ENOMEM; struct ipv6_devconf *all, *dflt; - err = -ENOMEM; - all = &ipv6_devconf; - dflt = &ipv6_devconf_dflt; + all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL); + if (all == NULL) + goto err_alloc_all; - if (!net_eq(net, &init_net)) { - all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL); - if (all == NULL) - goto err_alloc_all; + dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); + if (dflt == NULL) + goto err_alloc_dflt; - dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); - if (dflt == NULL) - goto err_alloc_dflt; - } else { - /* these will be inherited by all namespaces */ - dflt->autoconf = ipv6_defaults.autoconf; - dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; - } + /* these will be inherited by all namespaces */ + dflt->autoconf = ipv6_defaults.autoconf; + dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; net->ipv6.devconf_all = all; net->ipv6.devconf_dflt = dflt; @@ -4853,22 +4867,6 @@ static struct pernet_operations addrconf_ops = { .exit = addrconf_exit_net, }; -/* - * Device notifier - */ - -int register_inet6addr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&inet6addr_chain, nb); -} -EXPORT_SYMBOL(register_inet6addr_notifier); - -int unregister_inet6addr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&inet6addr_chain, nb); -} -EXPORT_SYMBOL(unregister_inet6addr_notifier); - static struct rtnl_af_ops inet6_ops = { .family = AF_INET6, .fill_link_af = inet6_fill_link_af, diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index d051e5f..7210456 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -78,3 +78,22 @@ int __ipv6_addr_type(const struct in6_addr *addr) } EXPORT_SYMBOL(__ipv6_addr_type); +static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); + +int register_inet6addr_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&inet6addr_chain, nb); +} +EXPORT_SYMBOL(register_inet6addr_notifier); + +int unregister_inet6addr_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&inet6addr_chain, nb); +} +EXPORT_SYMBOL(unregister_inet6addr_notifier); + +int inet6addr_notifier_call_chain(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&inet6addr_chain, val, v); +} +EXPORT_SYMBOL(inet6addr_notifier_call_chain); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index ff76eec..aad6435 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -173,9 +173,8 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, const struct in6_addr *addr, int type, int ifindex) { - struct hlist_node *pos; struct ip6addrlbl_entry *p; - hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { if (__ip6addrlbl_match(net, p, addr, type, ifindex)) return p; } @@ -261,9 +260,9 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) if (hlist_empty(&ip6addrlbl_table.head)) { hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); } else { - struct hlist_node *pos, *n; + struct hlist_node *n; struct ip6addrlbl_entry *p = NULL; - hlist_for_each_entry_safe(p, pos, n, + hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { if (p->prefixlen == newp->prefixlen && net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && @@ -319,13 +318,13 @@ static int __ip6addrlbl_del(struct net *net, int ifindex) { struct ip6addrlbl_entry *p = NULL; - struct hlist_node *pos, *n; + struct hlist_node *n; int ret = -ESRCH; ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, prefixlen, ifindex); - hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && net_eq(ip6addrlbl_net(p), net) && p->ifindex == ifindex && @@ -380,11 +379,11 @@ static int __net_init ip6addrlbl_net_init(struct net *net) static void __net_exit ip6addrlbl_net_exit(struct net *net) { struct ip6addrlbl_entry *p = NULL; - struct hlist_node *pos, *n; + struct hlist_node *n; /* Remove all labels belonging to the exiting net */ spin_lock(&ip6addrlbl_table.lock); - hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { if (net_eq(ip6addrlbl_net(p), net)) { hlist_del_rcu(&p->list); ip6addrlbl_put(p); @@ -505,12 +504,11 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct ip6addrlbl_entry *p; - struct hlist_node *pos; int idx = 0, s_idx = cb->args[0]; int err; rcu_read_lock(); - hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { if (idx >= s_idx && net_eq(ip6addrlbl_net(p), net)) { if ((err = ip6addrlbl_fill(skb, p, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b043c60..6b793bf 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -811,11 +811,10 @@ static struct pernet_operations inet6_net_ops = { static int __init inet6_init(void) { - struct sk_buff *dummy_skb; struct list_head *r; int err = 0; - BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); + BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); /* Register the socket-side information for inet6_create. */ for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index ecc35b9..bb02e17 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -472,7 +472,10 @@ static void ah6_input_done(struct crypto_async_request *base, int err) skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, hdr_len); __skb_pull(skb, ah_hlen + hdr_len); - skb_set_transport_header(skb, -hdr_len); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -hdr_len); out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); @@ -518,8 +521,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) /* We are going to _remove_ AH header to keep sockets happy, * so... Later this can change. */ - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto out; skb->ip_summed = CHECKSUM_NONE; @@ -593,9 +595,13 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, hdr_len); - skb->transport_header = skb->network_header; __skb_pull(skb, ah_hlen + hdr_len); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -hdr_len); + err = nexthdr; out_free: diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 757a810..5a80f15 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -47,7 +47,7 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); /* Big ac list lock for all the sockets */ -static DEFINE_RWLOCK(ipv6_sk_ac_lock); +static DEFINE_SPINLOCK(ipv6_sk_ac_lock); /* @@ -128,10 +128,10 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = ipv6_dev_ac_inc(dev, addr); if (!err) { - write_lock_bh(&ipv6_sk_ac_lock); + spin_lock_bh(&ipv6_sk_ac_lock); pac->acl_next = np->ipv6_ac_list; np->ipv6_ac_list = pac; - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); pac = NULL; } @@ -152,7 +152,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) struct ipv6_ac_socklist *pac, *prev_pac; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_ac_lock); + spin_lock_bh(&ipv6_sk_ac_lock); prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { if ((ifindex == 0 || pac->acl_ifindex == ifindex) && @@ -161,7 +161,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) prev_pac = pac; } if (!pac) { - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); return -ENOENT; } if (prev_pac) @@ -169,7 +169,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) else np->ipv6_ac_list = pac->acl_next; - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, pac->acl_ifindex); @@ -192,10 +192,10 @@ void ipv6_sock_ac_close(struct sock *sk) if (!np->ipv6_ac_list) return; - write_lock_bh(&ipv6_sk_ac_lock); + spin_lock_bh(&ipv6_sk_ac_lock); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); prev_index = 0; rcu_read_lock(); @@ -509,7 +509,7 @@ static const struct file_operations ac6_seq_fops = { int __net_init ac6_proc_init(struct net *net) { - if (!proc_net_fops_create(net, "anycast6", S_IRUGO, &ac6_seq_fops)) + if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops)) return -ENOMEM; return 0; @@ -517,7 +517,7 @@ int __net_init ac6_proc_init(struct net *net) void ac6_proc_exit(struct net *net) { - proc_net_remove(net, "anycast6"); + remove_proc_entry("anycast6", net->proc_net); } #endif diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 8edf260..f5a5478 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -30,6 +30,7 @@ #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/tcp_states.h> +#include <net/dsfield.h> #include <linux/errqueue.h> #include <asm/uaccess.h> @@ -356,12 +357,11 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) sin->sin6_port = serr->port; sin->sin6_scope_id = 0; if (skb->protocol == htons(ETH_P_IPV6)) { - sin->sin6_addr = - *(struct in6_addr *)(nh + serr->addr_offset); + const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset), + struct ipv6hdr, daddr); + sin->sin6_addr = ip6h->daddr; if (np->sndflow) - sin->sin6_flowinfo = - (*(__be32 *)(nh + serr->addr_offset - 24) & - IPV6_FLOWINFO_MASK); + sin->sin6_flowinfo = ip6_flowinfo(ip6h); if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = IP6CB(skb)->iif; } else { @@ -380,7 +380,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) - datagram_recv_ctl(sk, msg, skb); + ip6_datagram_recv_ctl(sk, msg, skb); if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = IP6CB(skb)->iif; } else { @@ -468,7 +468,8 @@ out: } -int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) +int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet6_skb_parm *opt = IP6CB(skb); @@ -488,13 +489,14 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) } if (np->rxopt.bits.rxtclass) { - int tclass = ipv6_tclass(ipv6_hdr(skb)); + int tclass = ipv6_get_dsfield(ipv6_hdr(skb)); put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); } - if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) { - __be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK; - put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); + if (np->rxopt.bits.rxflow) { + __be32 flowinfo = ip6_flowinfo((struct ipv6hdr *)nh); + if (flowinfo) + put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); } /* HbH is allowed only once */ @@ -597,11 +599,12 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) } return 0; } +EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl); -int datagram_send_ctl(struct net *net, struct sock *sk, - struct msghdr *msg, struct flowi6 *fl6, - struct ipv6_txoptions *opt, - int *hlimit, int *tclass, int *dontfrag) +int ip6_datagram_send_ctl(struct net *net, struct sock *sk, + struct msghdr *msg, struct flowi6 *fl6, + struct ipv6_txoptions *opt, + int *hlimit, int *tclass, int *dontfrag) { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; @@ -871,4 +874,4 @@ int datagram_send_ctl(struct net *net, struct sock *sk, exit_f: return err; } -EXPORT_SYMBOL_GPL(datagram_send_ctl); +EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 282f372..40ffd72 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -300,7 +300,10 @@ static int esp_input_done2(struct sk_buff *skb, int err) pskb_trim(skb, skb->len - alen - padlen - 2); __skb_pull(skb, hlen); - skb_set_transport_header(skb, -hdr_len); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -hdr_len); err = nexthdr[1]; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 473f628..07a7d65 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -553,7 +553,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) const unsigned char *nh = skb_network_header(skb); if (nh[optoff + 1] == 2) { - IP6CB(skb)->ra = optoff; + IP6CB(skb)->flags |= IP6SKB_ROUTERALERT; + memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra)); return true; } LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b4a9fd5..fff5bdd 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -81,10 +81,22 @@ static inline struct sock *icmpv6_sk(struct net *net) return net->ipv6.icmp_sk[smp_processor_id()]; } +static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct net *net = dev_net(skb->dev); + + if (type == ICMPV6_PKT_TOOBIG) + ip6_update_pmtu(skb, net, info, 0, 0); + else if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); +} + static int icmpv6_rcv(struct sk_buff *skb); static const struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, + .err_handler = icmpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 3064785..9bfab19 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -31,25 +31,33 @@ int inet6_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb, bool relax) { const struct sock *sk2; - const struct hlist_node *node; + int reuse = sk->sk_reuse; + int reuseport = sk->sk_reuseport; + kuid_t uid = sock_i_uid((struct sock *)sk); /* We must walk the whole port owner list in this case. -DaveM */ /* * See comment in inet_csk_bind_conflict about sock lookup * vs net namespaces issues. */ - sk_for_each_bound(sk2, node, &tb->owners) { + sk_for_each_bound(sk2, &tb->owners) { if (sk != sk2 && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && - (!sk->sk_reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - ipv6_rcv_saddr_equal(sk, sk2)) - break; + sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { + if ((!reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + (!reuseport || !sk2->sk_reuseport || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(uid, + sock_i_uid((struct sock *)sk2))))) { + if (ipv6_rcv_saddr_equal(sk, sk2)) + break; + } + } } - return node != NULL; + return sk2 != NULL; } EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index dea17fd..32b4a16 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -158,25 +158,38 @@ static inline int compute_score(struct sock *sk, struct net *net, } struct sock *inet6_lookup_listener(struct net *net, - struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, + struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, + const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif) { struct sock *sk; const struct hlist_nulls_node *node; struct sock *result; - int score, hiscore; + int score, hiscore, matches = 0, reuseport = 0; + u32 phash = 0; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; rcu_read_lock(); begin: result = NULL; - hiscore = -1; + hiscore = 0; sk_nulls_for_each(sk, node, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif); if (score > hiscore) { hiscore = score; result = sk; + reuseport = sk->sk_reuseport; + if (reuseport) { + phash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } + } else if (score == hiscore && reuseport) { + matches++; + if (((u64)phash * matches) >> 32 == 0) + result = sk; + phash = next_pseudo_random32(phash); } } /* diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c new file mode 100644 index 0000000..72d198b --- /dev/null +++ b/net/ipv6/ip6_checksum.c @@ -0,0 +1,97 @@ +#include <net/ip.h> +#include <net/udp.h> +#include <net/udplite.h> +#include <asm/checksum.h> + +#ifndef _HAVE_ARCH_IPV6_CSUM +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, unsigned short proto, + __wsum csum) +{ + + int carry; + __u32 ulen; + __u32 uproto; + __u32 sum = (__force u32)csum; + + sum += (__force u32)saddr->s6_addr32[0]; + carry = (sum < (__force u32)saddr->s6_addr32[0]); + sum += carry; + + sum += (__force u32)saddr->s6_addr32[1]; + carry = (sum < (__force u32)saddr->s6_addr32[1]); + sum += carry; + + sum += (__force u32)saddr->s6_addr32[2]; + carry = (sum < (__force u32)saddr->s6_addr32[2]); + sum += carry; + + sum += (__force u32)saddr->s6_addr32[3]; + carry = (sum < (__force u32)saddr->s6_addr32[3]); + sum += carry; + + sum += (__force u32)daddr->s6_addr32[0]; + carry = (sum < (__force u32)daddr->s6_addr32[0]); + sum += carry; + + sum += (__force u32)daddr->s6_addr32[1]; + carry = (sum < (__force u32)daddr->s6_addr32[1]); + sum += carry; + + sum += (__force u32)daddr->s6_addr32[2]; + carry = (sum < (__force u32)daddr->s6_addr32[2]); + sum += carry; + + sum += (__force u32)daddr->s6_addr32[3]; + carry = (sum < (__force u32)daddr->s6_addr32[3]); + sum += carry; + + ulen = (__force u32)htonl((__u32) len); + sum += ulen; + carry = (sum < ulen); + sum += carry; + + uproto = (__force u32)htonl(proto); + sum += uproto; + carry = (sum < uproto); + sum += carry; + + return csum_fold((__force __wsum)sum); +} +EXPORT_SYMBOL(csum_ipv6_magic); +#endif + +int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) +{ + int err; + + UDP_SKB_CB(skb)->partial_cov = 0; + UDP_SKB_CB(skb)->cscov = skb->len; + + if (proto == IPPROTO_UDPLITE) { + err = udplite_checksum_init(skb, uh); + if (err) + return err; + } + + if (uh->check == 0) { + /* RFC 2460 section 8.1 says that we SHOULD log + this error. Well, it is reasonable. + */ + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); + return 1; + } + if (skb->ip_summed == CHECKSUM_COMPLETE && + !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, + skb->len, proto, skb->csum)) + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (!skb_csum_unnecessary(skb)) + skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len, proto, 0)); + + return 0; +} +EXPORT_SYMBOL(udp6_csum_init); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 710cafd..192dd1a 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -224,7 +224,6 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) { struct fib6_table *tb; struct hlist_head *head; - struct hlist_node *node; unsigned int h; if (id == 0) @@ -232,7 +231,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) h = id & (FIB6_TABLE_HASHSZ - 1); rcu_read_lock(); head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(tb, head, tb6_hlist) { if (tb->tb6_id == id) { rcu_read_unlock(); return tb; @@ -363,7 +362,6 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) struct rt6_rtnl_dump_arg arg; struct fib6_walker_t *w; struct fib6_table *tb; - struct hlist_node *node; struct hlist_head *head; int res = 0; @@ -398,7 +396,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(tb, head, tb6_hlist) { if (e < s_e) goto next; res = fib6_dump_table(tb, skb, cb); @@ -1520,14 +1518,13 @@ void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg int prune, void *arg) { struct fib6_table *table; - struct hlist_node *node; struct hlist_head *head; unsigned int h; rcu_read_lock(); for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(table, head, tb6_hlist) { read_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, func, prune, arg); @@ -1540,14 +1537,13 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), int prune, void *arg) { struct fib6_table *table; - struct hlist_node *node; struct hlist_head *head; unsigned int h; rcu_read_lock(); for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(table, head, tb6_hlist) { write_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, func, prune, arg); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 29124b7..b973ed3 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -51,25 +51,38 @@ #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) static atomic_t fl_size = ATOMIC_INIT(0); -static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1]; +static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; static void ip6_fl_gc(unsigned long dummy); static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0); /* FL hash table lock: it protects only of GC */ -static DEFINE_RWLOCK(ip6_fl_lock); +static DEFINE_SPINLOCK(ip6_fl_lock); /* Big socket sock */ -static DEFINE_RWLOCK(ip6_sk_fl_lock); +static DEFINE_SPINLOCK(ip6_sk_fl_lock); +#define for_each_fl_rcu(hash, fl) \ + for (fl = rcu_dereference_bh(fl_ht[(hash)]); \ + fl != NULL; \ + fl = rcu_dereference_bh(fl->next)) +#define for_each_fl_continue_rcu(fl) \ + for (fl = rcu_dereference_bh(fl->next); \ + fl != NULL; \ + fl = rcu_dereference_bh(fl->next)) + +#define for_each_sk_fl_rcu(np, sfl) \ + for (sfl = rcu_dereference_bh(np->ipv6_fl_list); \ + sfl != NULL; \ + sfl = rcu_dereference_bh(sfl->next)) static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; - for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) { + for_each_fl_rcu(FL_HASH(label), fl) { if (fl->label == label && net_eq(fl->fl_net, net)) return fl; } @@ -80,11 +93,11 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; - read_lock_bh(&ip6_fl_lock); + rcu_read_lock_bh(); fl = __fl_lookup(net, label); - if (fl) - atomic_inc(&fl->users); - read_unlock_bh(&ip6_fl_lock); + if (fl && !atomic_inc_not_zero(&fl->users)) + fl = NULL; + rcu_read_unlock_bh(); return fl; } @@ -96,13 +109,13 @@ static void fl_free(struct ip6_flowlabel *fl) put_pid(fl->owner.pid); release_net(fl->fl_net); kfree(fl->opt); + kfree_rcu(fl, rcu); } - kfree(fl); } static void fl_release(struct ip6_flowlabel *fl) { - write_lock_bh(&ip6_fl_lock); + spin_lock_bh(&ip6_fl_lock); fl->lastuse = jiffies; if (atomic_dec_and_test(&fl->users)) { @@ -119,7 +132,7 @@ static void fl_release(struct ip6_flowlabel *fl) time_after(ip6_fl_gc_timer.expires, ttd)) mod_timer(&ip6_fl_gc_timer, ttd); } - write_unlock_bh(&ip6_fl_lock); + spin_unlock_bh(&ip6_fl_lock); } static void ip6_fl_gc(unsigned long dummy) @@ -128,12 +141,13 @@ static void ip6_fl_gc(unsigned long dummy) unsigned long now = jiffies; unsigned long sched = 0; - write_lock(&ip6_fl_lock); + spin_lock(&ip6_fl_lock); for (i=0; i<=FL_HASH_MASK; i++) { struct ip6_flowlabel *fl, **flp; flp = &fl_ht[i]; - while ((fl=*flp) != NULL) { + while ((fl = rcu_dereference_protected(*flp, + lockdep_is_held(&ip6_fl_lock))) != NULL) { if (atomic_read(&fl->users) == 0) { unsigned long ttd = fl->lastuse + fl->linger; if (time_after(ttd, fl->expires)) @@ -156,18 +170,19 @@ static void ip6_fl_gc(unsigned long dummy) if (sched) { mod_timer(&ip6_fl_gc_timer, sched); } - write_unlock(&ip6_fl_lock); + spin_unlock(&ip6_fl_lock); } static void __net_exit ip6_fl_purge(struct net *net) { int i; - write_lock(&ip6_fl_lock); + spin_lock(&ip6_fl_lock); for (i = 0; i <= FL_HASH_MASK; i++) { struct ip6_flowlabel *fl, **flp; flp = &fl_ht[i]; - while ((fl = *flp) != NULL) { + while ((fl = rcu_dereference_protected(*flp, + lockdep_is_held(&ip6_fl_lock))) != NULL) { if (net_eq(fl->fl_net, net) && atomic_read(&fl->users) == 0) { *flp = fl->next; @@ -178,7 +193,7 @@ static void __net_exit ip6_fl_purge(struct net *net) flp = &fl->next; } } - write_unlock(&ip6_fl_lock); + spin_unlock(&ip6_fl_lock); } static struct ip6_flowlabel *fl_intern(struct net *net, @@ -188,7 +203,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, fl->label = label & IPV6_FLOWLABEL_MASK; - write_lock_bh(&ip6_fl_lock); + spin_lock_bh(&ip6_fl_lock); if (label == 0) { for (;;) { fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK; @@ -210,16 +225,16 @@ static struct ip6_flowlabel *fl_intern(struct net *net, lfl = __fl_lookup(net, fl->label); if (lfl != NULL) { atomic_inc(&lfl->users); - write_unlock_bh(&ip6_fl_lock); + spin_unlock_bh(&ip6_fl_lock); return lfl; } } fl->lastuse = jiffies; fl->next = fl_ht[FL_HASH(fl->label)]; - fl_ht[FL_HASH(fl->label)] = fl; + rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); atomic_inc(&fl_size); - write_unlock_bh(&ip6_fl_lock); + spin_unlock_bh(&ip6_fl_lock); return NULL; } @@ -234,17 +249,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) label &= IPV6_FLOWLABEL_MASK; - read_lock_bh(&ip6_sk_fl_lock); - for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label) { fl->lastuse = jiffies; atomic_inc(&fl->users); - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return fl; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return NULL; } @@ -255,11 +270,21 @@ void fl6_free_socklist(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_fl_socklist *sfl; - while ((sfl = np->ipv6_fl_list) != NULL) { + if (!rcu_access_pointer(np->ipv6_fl_list)) + return; + + spin_lock_bh(&ip6_sk_fl_lock); + while ((sfl = rcu_dereference_protected(np->ipv6_fl_list, + lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { np->ipv6_fl_list = sfl->next; + spin_unlock_bh(&ip6_sk_fl_lock); + fl_release(sfl->fl); - kfree(sfl); + kfree_rcu(sfl, rcu); + + spin_lock_bh(&ip6_sk_fl_lock); } + spin_unlock_bh(&ip6_sk_fl_lock); } /* Service routines */ @@ -365,8 +390,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, msg.msg_control = (void*)(fl->opt+1); memset(&flowi6, 0, sizeof(flowi6)); - err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk, - &junk, &junk); + err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, + &junk, &junk, &junk); if (err) goto done; err = -EINVAL; @@ -424,7 +449,7 @@ static int mem_check(struct sock *sk) if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) + for_each_sk_fl_rcu(np, sfl) count++; if (room <= 0 || @@ -467,11 +492,11 @@ static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, struct ip6_flowlabel *fl) { - write_lock_bh(&ip6_sk_fl_lock); + spin_lock_bh(&ip6_sk_fl_lock); sfl->fl = fl; sfl->next = np->ipv6_fl_list; - np->ipv6_fl_list = sfl; - write_unlock_bh(&ip6_sk_fl_lock); + rcu_assign_pointer(np->ipv6_fl_list, sfl); + spin_unlock_bh(&ip6_sk_fl_lock); } int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) @@ -493,31 +518,33 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) switch (freq.flr_action) { case IPV6_FL_A_PUT: - write_lock_bh(&ip6_sk_fl_lock); - for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) { + spin_lock_bh(&ip6_sk_fl_lock); + for (sflp = &np->ipv6_fl_list; + (sfl = rcu_dereference(*sflp))!=NULL; + sflp = &sfl->next) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) np->flow_label &= ~IPV6_FLOWLABEL_MASK; - *sflp = sfl->next; - write_unlock_bh(&ip6_sk_fl_lock); + *sflp = rcu_dereference(sfl->next); + spin_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); - kfree(sfl); + kfree_rcu(sfl, rcu); return 0; } } - write_unlock_bh(&ip6_sk_fl_lock); + spin_unlock_bh(&ip6_sk_fl_lock); return -ESRCH; case IPV6_FL_A_RENEW: - read_lock_bh(&ip6_sk_fl_lock); - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq.flr_label) { err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires); - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return err; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); if (freq.flr_share == IPV6_FL_S_NONE && ns_capable(net->user_ns, CAP_NET_ADMIN)) { @@ -541,11 +568,11 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label) { err = -EEXIST; - read_lock_bh(&ip6_sk_fl_lock); - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_flags&IPV6_FL_F_EXCL) { - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); goto done; } fl1 = sfl->fl; @@ -553,7 +580,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) break; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); if (fl1 == NULL) fl1 = fl_lookup(net, freq.flr_label); @@ -641,13 +668,13 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) { - fl = fl_ht[state->bucket]; - - while (fl && !net_eq(fl->fl_net, net)) - fl = fl->next; - if (fl) - break; + for_each_fl_rcu(state->bucket, fl) { + if (net_eq(fl->fl_net, net)) + goto out; + } } + fl = NULL; +out: return fl; } @@ -656,18 +683,22 @@ static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flo struct ip6fl_iter_state *state = ip6fl_seq_private(seq); struct net *net = seq_file_net(seq); - fl = fl->next; + for_each_fl_continue_rcu(fl) { + if (net_eq(fl->fl_net, net)) + goto out; + } + try_again: - while (fl && !net_eq(fl->fl_net, net)) - fl = fl->next; - - while (!fl) { - if (++state->bucket <= FL_HASH_MASK) { - fl = fl_ht[state->bucket]; - goto try_again; - } else - break; + if (++state->bucket <= FL_HASH_MASK) { + for_each_fl_rcu(state->bucket, fl) { + if (net_eq(fl->fl_net, net)) + goto out; + } + goto try_again; } + fl = NULL; + +out: return fl; } @@ -681,9 +712,9 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos) } static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(ip6_fl_lock) + __acquires(RCU) { - read_lock_bh(&ip6_fl_lock); + rcu_read_lock_bh(); return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -700,9 +731,9 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ip6fl_seq_stop(struct seq_file *seq, void *v) - __releases(ip6_fl_lock) + __releases(RCU) { - read_unlock_bh(&ip6_fl_lock); + rcu_read_unlock_bh(); } static int ip6fl_seq_show(struct seq_file *seq, void *v) @@ -775,15 +806,15 @@ static const struct file_operations ip6fl_seq_fops = { static int __net_init ip6_flowlabel_proc_init(struct net *net) { - if (!proc_net_fops_create(net, "ip6_flowlabel", - S_IRUGO, &ip6fl_seq_fops)) + if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net, + &ip6fl_seq_fops)) return -ENOMEM; return 0; } static void __net_exit ip6_flowlabel_proc_fini(struct net *net) { - proc_net_remove(net, "ip6_flowlabel"); + remove_proc_entry("ip6_flowlabel", net->proc_net); } #else static inline int ip6_flowlabel_proc_init(struct net *net) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 867466c..e4efffe 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -758,8 +758,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_dst_set_noref(skb, dst); } - skb->transport_header = skb->network_header; - proto = NEXTHDR_GRE; if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); @@ -768,14 +766,13 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_push(skb, gre_hlen); skb_reset_network_header(skb); + skb_set_transport_header(skb, sizeof(*ipv6h)); /* * Push down and install the IP header. */ ipv6h = ipv6_hdr(skb); - *(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000); - dsfield = INET_ECN_encapsulate(0, dsfield); - ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); ipv6h->hop_limit = tunnel->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; @@ -961,7 +958,7 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb, int ret; if (!ip6_tnl_xmit_ctl(t)) - return -1; + goto tx_err; switch (skb->protocol) { case htons(ETH_P_IP): @@ -1241,7 +1238,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); __be16 *p = (__be16 *)(ipv6h+1); - *(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000); + ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->saddr = t->parms.laddr; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index a52d864..2bab2aa 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -118,6 +118,27 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt ipv6_addr_loopback(&hdr->daddr)) goto err; + /* RFC4291 Errata ID: 3480 + * Interface-Local scope spans only a single interface on a + * node and is useful only for loopback transmission of + * multicast. Packets with interface-local scope received + * from another node must be discarded. + */ + if (!(skb->pkt_type == PACKET_LOOPBACK || + dev->flags & IFF_LOOPBACK) && + ipv6_addr_is_multicast(&hdr->daddr) && + IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1) + goto err; + + /* RFC4291 2.7 + * Nodes must not originate a packet to a multicast address whose scope + * field contains the reserved value 0; if such a packet is received, it + * must be silently dropped. + */ + if (ipv6_addr_is_multicast(&hdr->daddr) && + IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0) + goto err; + /* * RFC4291 2.7 * Multicast addresses must not be used as source addresses in IPv6 @@ -212,7 +233,7 @@ resubmit: if (ipv6_addr_is_multicast(&hdr->daddr) && !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, &hdr->saddr) && - !ipv6_is_mld(skb, nexthdr)) + !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) goto discard; } if (!(ipprot->flags & INET6_PROTO_NOPOLICY) && @@ -232,9 +253,11 @@ resubmit: icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff); } - } else + kfree_skb(skb); + } else { IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); - kfree_skb(skb); + consume_skb(skb); + } } rcu_read_unlock(); return 0; @@ -270,7 +293,8 @@ int ip6_mc_input(struct sk_buff *skb) * IPv6 multicast router mode is now supported ;) */ if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && - !(ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) && + !(ipv6_addr_type(&hdr->daddr) & + (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { /* * Okay, we try to forward - split and duplicate @@ -280,10 +304,8 @@ int ip6_mc_input(struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); /* Check for MLD */ - if (unlikely(opt->ra)) { + if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { /* Check if this is a mld message */ - u8 *ptr = skb_network_header(skb) + opt->ra; - struct icmp6hdr *icmp6; u8 nexthdr = hdr->nexthdr; __be16 frag_off; int offset; @@ -291,7 +313,7 @@ int ip6_mc_input(struct sk_buff *skb) /* Check if the value of Router Alert * is for MLD (0x0000). */ - if ((ptr[2] | ptr[3]) == 0) { + if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) { deliver = false; if (!ipv6_ext_hdr(nexthdr)) { @@ -303,24 +325,10 @@ int ip6_mc_input(struct sk_buff *skb) if (offset < 0) goto out; - if (nexthdr != IPPROTO_ICMPV6) - goto out; - - if (!pskb_may_pull(skb, (skb_network_header(skb) + - offset + 1 - skb->data))) + if (!ipv6_is_mld(skb, nexthdr, offset)) goto out; - icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); - - switch (icmp6->icmp6_type) { - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - case ICMPV6_MLD2_REPORT: - deliver = true; - break; - } - goto out; + deliver = true; } /* unknown RA - process it normally */ } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index f26f0da..8234c1d 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -99,6 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_GRE | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5552d13..155eccf 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,8 +56,6 @@ #include <net/checksum.h> #include <linux/mroute6.h> -int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); - int __ip6_local_out(struct sk_buff *skb) { int len; @@ -88,7 +86,8 @@ static int ip6_finish_output2(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; struct neighbour *neigh; - struct rt6_info *rt; + struct in6_addr *nexthop; + int ret; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -121,12 +120,26 @@ static int ip6_finish_output2(struct sk_buff *skb) IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, skb->len); + + if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= + IPV6_ADDR_SCOPE_NODELOCAL && + !(dev->flags & IFF_LOOPBACK)) { + kfree_skb(skb); + return 0; + } } - rt = (struct rt6_info *) dst; - neigh = rt->n; - if (neigh) - return dst_neigh_output(dst, neigh, skb); + rcu_read_lock_bh(); + nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); + neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); + if (unlikely(!neigh)) + neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); + if (!IS_ERR(neigh)) { + ret = dst_neigh_output(dst, neigh, skb); + rcu_read_unlock_bh(); + return ret; + } + rcu_read_unlock_bh(); IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -216,7 +229,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel; + ip6_flow_hdr(hdr, tclass, fl6->flowlabel); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -236,9 +249,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, dst->dev, dst_output); } - net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + ipv6_local_error(sk, EMSGSIZE, fl6, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; @@ -246,39 +258,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, EXPORT_SYMBOL(ip6_xmit); -/* - * To avoid extra problems ND packets are send through this - * routine. It's code duplication but I really want to avoid - * extra checks since ipv6_build_header is used by TCP (which - * is for us performance critical) - */ - -int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, - const struct in6_addr *saddr, const struct in6_addr *daddr, - int proto, int len) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6hdr *hdr; - - skb->protocol = htons(ETH_P_IPV6); - skb->dev = dev; - - skb_reset_network_header(skb); - skb_put(skb, sizeof(struct ipv6hdr)); - hdr = ipv6_hdr(skb); - - *(__be32*)hdr = htonl(0x60000000); - - hdr->payload_len = htons(len); - hdr->nexthdr = proto; - hdr->hop_limit = np->hop_limit; - - hdr->saddr = *saddr; - hdr->daddr = *daddr; - - return 0; -} - static int ip6_call_ra_chain(struct sk_buff *skb, int sel) { struct ip6_ra_chain *ra; @@ -913,8 +892,12 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry of the nexthop router */ rt = (struct rt6_info *) *dst; - n = rt->n; - if (n && !(n->nud_state & NUD_VALID)) { + rcu_read_lock_bh(); + n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); + err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; + rcu_read_unlock_bh(); + + if (err) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; int redirect; @@ -1213,10 +1196,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (dst_allfrag(rt->dst.path)) cork->flags |= IPCORK_ALLFRAG; cork->length = 0; - exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; + exthdrlen = (opt ? opt->opt_flen : 0); length += exthdrlen; transhdrlen += exthdrlen; - dst_exthdrlen = rt->dst.header_len; + dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; } else { rt = (struct rt6_info *)cork->dst; fl6 = &inet->cork.fl.u.ip6; @@ -1548,9 +1531,7 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - *(__be32*)hdr = fl6->flowlabel | - htonl(0x60000000 | ((int)np->cork.tclass << 20)); - + ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel); hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a14f28b..fff83cb 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1030,9 +1030,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - *(__be32*)ipv6h = fl6->flowlabel | htonl(0x60000000); - dsfield = INET_ECN_encapsulate(0, dsfield); - ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 26dcdec..96bfb4e 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1017,6 +1017,50 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, return NULL; } +/* Look for a (*,*,oif) entry */ +static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt, + mifi_t mifi) +{ + int line = MFC6_HASH(&in6addr_any, &in6addr_any); + struct mfc6_cache *c; + + list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_any(&c->mf6c_mcastgrp) && + (c->mfc_un.res.ttls[mifi] < 255)) + return c; + + return NULL; +} + +/* Look for a (*,G) entry */ +static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt, + struct in6_addr *mcastgrp, + mifi_t mifi) +{ + int line = MFC6_HASH(mcastgrp, &in6addr_any); + struct mfc6_cache *c, *proxy; + + if (ipv6_addr_any(mcastgrp)) + goto skip; + + list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) { + if (c->mfc_un.res.ttls[mifi] < 255) + return c; + + /* It's ok if the mifi is part of the static tree */ + proxy = ip6mr_cache_find_any_parent(mrt, + c->mf6c_parent); + if (proxy && proxy->mfc_un.res.ttls[mifi] < 255) + return c; + } + +skip: + return ip6mr_cache_find_any_parent(mrt, mifi); +} + /* * Allocate a multicast cache entry */ @@ -1247,7 +1291,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) * MFC6 cache manipulation by user space */ -static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) +static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, + int parent) { int line; struct mfc6_cache *c, *next; @@ -1256,7 +1301,9 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { + ipv6_addr_equal(&c->mf6c_mcastgrp, + &mfc->mf6cc_mcastgrp.sin6_addr) && + (parent == -1 || parent == c->mf6c_parent)) { write_lock_bh(&mrt_lock); list_del(&c->list); write_unlock_bh(&mrt_lock); @@ -1312,9 +1359,9 @@ static int __net_init ip6mr_net_init(struct net *net) #ifdef CONFIG_PROC_FS err = -ENOMEM; - if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) + if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops)) goto proc_vif_fail; - if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops)) + if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops)) goto proc_cache_fail; #endif @@ -1322,7 +1369,7 @@ static int __net_init ip6mr_net_init(struct net *net) #ifdef CONFIG_PROC_FS proc_cache_fail: - proc_net_remove(net, "ip6_mr_vif"); + remove_proc_entry("ip6_mr_vif", net->proc_net); proc_vif_fail: ip6mr_rules_exit(net); #endif @@ -1333,8 +1380,8 @@ fail: static void __net_exit ip6mr_net_exit(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_remove(net, "ip6_mr_cache"); - proc_net_remove(net, "ip6_mr_vif"); + remove_proc_entry("ip6_mr_cache", net->proc_net); + remove_proc_entry("ip6_mr_vif", net->proc_net); #endif ip6mr_rules_exit(net); } @@ -1391,7 +1438,7 @@ void ip6_mr_cleanup(void) } static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, - struct mf6cctl *mfc, int mrtsock) + struct mf6cctl *mfc, int mrtsock, int parent) { bool found = false; int line; @@ -1413,7 +1460,9 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { + ipv6_addr_equal(&c->mf6c_mcastgrp, + &mfc->mf6cc_mcastgrp.sin6_addr) && + (parent == -1 || parent == mfc->mf6cc_parent)) { found = true; break; } @@ -1430,7 +1479,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, return 0; } - if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) + if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && + !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) return -EINVAL; c = ip6mr_cache_alloc(); @@ -1596,7 +1646,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) { - int ret; + int ret, parent = 0; struct mif6ctl vif; struct mf6cctl mfc; mifi_t mifi; @@ -1653,15 +1703,21 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns */ case MRT6_ADD_MFC: case MRT6_DEL_MFC: + parent = -1; + case MRT6_ADD_MFC_PROXY: + case MRT6_DEL_MFC_PROXY: if (optlen < sizeof(mfc)) return -EINVAL; if (copy_from_user(&mfc, optval, sizeof(mfc))) return -EFAULT; + if (parent == 0) + parent = mfc.mf6cc_parent; rtnl_lock(); - if (optname == MRT6_DEL_MFC) - ret = ip6mr_mfc_delete(mrt, &mfc); + if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) + ret = ip6mr_mfc_delete(mrt, &mfc, parent); else - ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk); + ret = ip6mr_mfc_add(net, mrt, &mfc, + sk == mrt->mroute6_sk, parent); rtnl_unlock(); return ret; @@ -1710,6 +1766,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns return -EINVAL; if (get_user(v, (u32 __user *)optval)) return -EFAULT; + /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ + if (v != RT_TABLE_DEFAULT && v >= 100000000) + return -EINVAL; if (sk == mrt->mroute6_sk) return -EBUSY; @@ -2015,19 +2074,29 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, { int psend = -1; int vif, ct; + int true_vifi = ip6mr_find_vif(mrt, skb->dev); vif = cache->mf6c_parent; cache->mfc_un.res.pkt++; cache->mfc_un.res.bytes += skb->len; + if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) { + struct mfc6_cache *cache_proxy; + + /* For an (*,G) entry, we only check that the incomming + * interface is part of the static tree. + */ + cache_proxy = ip6mr_cache_find_any_parent(mrt, vif); + if (cache_proxy && + cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + goto forward; + } + /* * Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif6_table[vif].dev != skb->dev) { - int true_vifi; - cache->mfc_un.res.wrong_if++; - true_vifi = ip6mr_find_vif(mrt, skb->dev); if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -2045,14 +2114,32 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, goto dont_forward; } +forward: mrt->vif6_table[vif].pkt_in++; mrt->vif6_table[vif].bytes_in += skb->len; /* * Forward the frame */ + if (ipv6_addr_any(&cache->mf6c_origin) && + ipv6_addr_any(&cache->mf6c_mcastgrp)) { + if (true_vifi >= 0 && + true_vifi != cache->mf6c_parent && + ipv6_hdr(skb)->hop_limit > + cache->mfc_un.res.ttls[cache->mf6c_parent]) { + /* It's an (*,*) entry and the packet is not coming from + * the upstream: forward the packet to the upstream + * only. + */ + psend = cache->mf6c_parent; + goto last_forward; + } + goto dont_forward; + } for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { - if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { + /* For (*,G) entry, don't forward to the incoming interface */ + if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) && + ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) @@ -2061,6 +2148,7 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, psend = ct; } } +last_forward: if (psend != -1) { ip6mr_forward2(net, mrt, skb, cache, psend); return 0; @@ -2096,6 +2184,14 @@ int ip6_mr_input(struct sk_buff *skb) read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); + if (cache == NULL) { + int vif = ip6mr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ip6mr_cache_find_any(mrt, + &ipv6_hdr(skb)->daddr, + vif); + } /* * No usable cache entry @@ -2183,6 +2279,13 @@ int ip6mr_get_route(struct net *net, read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); + if (!cache && skb->dev) { + int vif = ip6mr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, + vif); + } if (!cache) { struct sk_buff *skb2; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index ee94d31..d1e2e8e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -476,8 +476,8 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk, - &junk); + retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, + &junk, &junk); if (retv) goto done; update: @@ -1002,7 +1002,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, release_sock(sk); if (skb) { - int err = datagram_recv_ctl(sk, &msg, skb); + int err = ip6_datagram_recv_ctl(sk, &msg, skb); kfree_skb(skb); if (err) return err; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 28dfa5f..bfa6cc3 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -376,8 +376,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, goto done; /* err = -EADDRNOTAVAIL */ rv = !0; for (i=0; i<psl->sl_count; i++) { - rv = memcmp(&psl->sl_addr[i], source, - sizeof(struct in6_addr)); + rv = !ipv6_addr_equal(&psl->sl_addr[i], source); if (rv == 0) break; } @@ -427,12 +426,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk, } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i=0; i<psl->sl_count; i++) { - rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr)); - if (rv == 0) - break; + rv = !ipv6_addr_equal(&psl->sl_addr[i], source); + if (rv == 0) /* There is an error in the address. */ + goto done; } - if (rv == 0) /* address already there is an error */ - goto done; for (j=psl->sl_count-1; j>=i; j--) psl->sl_addr[j+1] = psl->sl_addr[j]; psl->sl_addr[i] = *source; @@ -664,6 +661,10 @@ static void igmp6_group_added(struct ifmcaddr6 *mc) struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; + if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < + IPV6_ADDR_SCOPE_LINKLOCAL) + return; + spin_lock_bh(&mc->mca_lock); if (!(mc->mca_flags&MAF_LOADED)) { mc->mca_flags |= MAF_LOADED; @@ -690,6 +691,10 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; + if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < + IPV6_ADDR_SCOPE_LINKLOCAL) + return; + spin_lock_bh(&mc->mca_lock); if (mc->mca_flags&MAF_LOADED) { mc->mca_flags &= ~MAF_LOADED; @@ -935,33 +940,6 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) } /* - * identify MLD packets for MLD filter exceptions - */ -bool ipv6_is_mld(struct sk_buff *skb, int nexthdr) -{ - struct icmp6hdr *pic; - - if (nexthdr != IPPROTO_ICMPV6) - return false; - - if (!pskb_may_pull(skb, sizeof(struct icmp6hdr))) - return false; - - pic = icmp6_hdr(skb); - - switch (pic->icmp6_type) { - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - case ICMPV6_MLD2_REPORT: - return true; - default: - break; - } - return false; -} - -/* * check if the interface/address pair is valid */ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, @@ -1340,6 +1318,31 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) return scount; } +static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, + struct net_device *dev, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + int proto, int len) +{ + struct ipv6hdr *hdr; + + skb->protocol = htons(ETH_P_IPV6); + skb->dev = dev; + + skb_reset_network_header(skb); + skb_put(skb, sizeof(struct ipv6hdr)); + hdr = ipv6_hdr(skb); + + ip6_flow_hdr(hdr, 0, 0); + + hdr->payload_len = htons(len); + hdr->nexthdr = proto; + hdr->hop_limit = inet6_sk(sk)->hop_limit; + + hdr->saddr = *saddr; + hdr->daddr = *daddr; +} + static struct sk_buff *mld_newpack(struct net_device *dev, int size) { struct net *net = dev_net(dev); @@ -1375,7 +1378,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) } else saddr = &addr_buf; - ip6_nd_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0); + ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0); memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra)); @@ -1418,7 +1421,7 @@ static void mld_sendpack(struct sk_buff *skb) icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - dst = icmp6_dst_alloc(skb->dev, NULL, &fl6); + dst = icmp6_dst_alloc(skb->dev, &fl6); err = 0; if (IS_ERR(dst)) { @@ -1767,7 +1770,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) } else saddr = &addr_buf; - ip6_nd_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len); + ip6_mc_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len); memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra)); @@ -1786,7 +1789,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) icmpv6_flow_init(sk, &fl6, type, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - dst = icmp6_dst_alloc(skb->dev, NULL, &fl6); + dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto err_out; @@ -2596,10 +2599,10 @@ static int __net_init igmp6_proc_init(struct net *net) int err; err = -ENOMEM; - if (!proc_net_fops_create(net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops)) + if (!proc_create("igmp6", S_IRUGO, net->proc_net, &igmp6_mc_seq_fops)) goto out; - if (!proc_net_fops_create(net, "mcfilter6", S_IRUGO, - &igmp6_mcf_seq_fops)) + if (!proc_create("mcfilter6", S_IRUGO, net->proc_net, + &igmp6_mcf_seq_fops)) goto out_proc_net_igmp6; err = 0; @@ -2607,14 +2610,14 @@ out: return err; out_proc_net_igmp6: - proc_net_remove(net, "igmp6"); + remove_proc_entry("igmp6", net->proc_net); goto out; } static void __net_exit igmp6_proc_exit(struct net *net) { - proc_net_remove(net, "mcfilter6"); - proc_net_remove(net, "igmp6"); + remove_proc_entry("mcfilter6", net->proc_net); + remove_proc_entry("igmp6", net->proc_net); } #else static inline int igmp6_proc_init(struct net *net) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 6574175..76ef435 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -143,16 +143,12 @@ struct neigh_table nd_tbl = { .gc_thresh3 = 1024, }; -static inline int ndisc_opt_addr_space(struct net_device *dev) +static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data) { - return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type)); -} - -static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len, - unsigned short addr_type) -{ - int pad = ndisc_addr_option_pad(addr_type); - int space = NDISC_OPT_SPACE(data_len + pad); + int pad = ndisc_addr_option_pad(skb->dev->type); + int data_len = skb->dev->addr_len; + int space = ndisc_opt_addr_space(skb->dev); + u8 *opt = skb_put(skb, space); opt[0] = type; opt[1] = space>>3; @@ -166,7 +162,6 @@ static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len, opt += data_len; if ((space -= data_len) > 0) memset(opt, 0, space); - return opt + space; } static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, @@ -370,91 +365,88 @@ static void pndisc_destructor(struct pneigh_entry *n) ipv6_dev_mc_dec(dev, &maddr); } -static struct sk_buff *ndisc_build_skb(struct net_device *dev, - const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h, - const struct in6_addr *target, - int llinfo) +static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, + int len) { - struct net *net = dev_net(dev); - struct sock *sk = net->ipv6.ndisc_sk; - struct sk_buff *skb; - struct icmp6hdr *hdr; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - int len; + struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; + struct sk_buff *skb; int err; - u8 *opt; - - if (!dev->addr_len) - llinfo = 0; - - len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0); - if (llinfo) - len += ndisc_opt_addr_space(dev); skb = sock_alloc_send_skb(sk, - (MAX_HEADER + sizeof(struct ipv6hdr) + - len + hlen + tlen), + hlen + sizeof(struct ipv6hdr) + len + tlen, 1, &err); if (!skb) { - ND_PRINTK(0, err, "ND: %s failed to allocate an skb, err=%d\n", + ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n", __func__, err); return NULL; } - skb_reserve(skb, hlen); - ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len); + skb->protocol = htons(ETH_P_IPV6); + skb->dev = dev; - skb->transport_header = skb->tail; - skb_put(skb, len); + skb_reserve(skb, hlen + sizeof(struct ipv6hdr)); + skb_reset_transport_header(skb); - hdr = (struct icmp6hdr *)skb_transport_header(skb); - memcpy(hdr, icmp6h, sizeof(*hdr)); + return skb; +} - opt = skb_transport_header(skb) + sizeof(struct icmp6hdr); - if (target) { - *(struct in6_addr *)opt = *target; - opt += sizeof(*target); - } +static void ip6_nd_hdr(struct sk_buff *skb, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + int hop_limit, int len) +{ + struct ipv6hdr *hdr; - if (llinfo) - ndisc_fill_addr_option(opt, llinfo, dev->dev_addr, - dev->addr_len, dev->type); + skb_push(skb, sizeof(*hdr)); + skb_reset_network_header(skb); + hdr = ipv6_hdr(skb); - hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len, - IPPROTO_ICMPV6, - csum_partial(hdr, - len, 0)); + ip6_flow_hdr(hdr, 0, 0); - return skb; + hdr->payload_len = htons(len); + hdr->nexthdr = IPPROTO_ICMPV6; + hdr->hop_limit = hop_limit; + + hdr->saddr = *saddr; + hdr->daddr = *daddr; } -static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev, - struct neighbour *neigh, +static void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h) + const struct in6_addr *saddr) { - struct flowi6 fl6; - struct dst_entry *dst; - struct net *net = dev_net(dev); + struct dst_entry *dst = skb_dst(skb); + struct net *net = dev_net(skb->dev); struct sock *sk = net->ipv6.ndisc_sk; struct inet6_dev *idev; int err; + struct icmp6hdr *icmp6h = icmp6_hdr(skb); u8 type; type = icmp6h->icmp6_type; - icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex); - dst = icmp6_dst_alloc(dev, neigh, &fl6); - if (IS_ERR(dst)) { - kfree_skb(skb); - return; + if (!dst) { + struct sock *sk = net->ipv6.ndisc_sk; + struct flowi6 fl6; + + icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex); + dst = icmp6_dst_alloc(skb->dev, &fl6); + if (IS_ERR(dst)) { + kfree_skb(skb); + return; + } + + skb_dst_set(skb, dst); } - skb_dst_set(skb, dst); + icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len, + IPPROTO_ICMPV6, + csum_partial(icmp6h, + skb->len, 0)); + + ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len); rcu_read_lock(); idev = __in6_dev_get(dst->dev); @@ -470,36 +462,17 @@ static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev, rcu_read_unlock(); } -/* - * Send a Neighbour Discover packet - */ -static void __ndisc_send(struct net_device *dev, - struct neighbour *neigh, - const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h, const struct in6_addr *target, - int llinfo) -{ - struct sk_buff *skb; - - skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo); - if (!skb) - return; - - ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h); -} - static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, - int router, int solicited, int override, int inc_opt) + bool router, bool solicited, bool override, bool inc_opt) { + struct sk_buff *skb; struct in6_addr tmpaddr; struct inet6_ifaddr *ifp; const struct in6_addr *src_addr; - struct icmp6hdr icmp6h = { - .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT, - }; + struct nd_msg *msg; + int optlen = 0; /* for anycast or proxy, solicited_addr != src_addr */ ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1); @@ -517,13 +490,32 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, src_addr = &tmpaddr; } - icmp6h.icmp6_router = router; - icmp6h.icmp6_solicited = solicited; - icmp6h.icmp6_override = override; + if (!dev->addr_len) + inc_opt = 0; + if (inc_opt) + optlen += ndisc_opt_addr_space(dev); + + skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); + if (!skb) + return; - __ndisc_send(dev, neigh, daddr, src_addr, - &icmp6h, solicited_addr, - inc_opt ? ND_OPT_TARGET_LL_ADDR : 0); + msg = (struct nd_msg *)skb_put(skb, sizeof(*msg)); + *msg = (struct nd_msg) { + .icmph = { + .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT, + .icmp6_router = router, + .icmp6_solicited = solicited, + .icmp6_override = override, + }, + .target = *solicited_addr, + }; + + if (inc_opt) + ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, + dev->dev_addr); + + + ndisc_send_skb(skb, daddr, src_addr); } static void ndisc_send_unsol_na(struct net_device *dev) @@ -551,10 +543,11 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *solicit, const struct in6_addr *daddr, const struct in6_addr *saddr) { + struct sk_buff *skb; struct in6_addr addr_buf; - struct icmp6hdr icmp6h = { - .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION, - }; + int inc_opt = dev->addr_len; + int optlen = 0; + struct nd_msg *msg; if (saddr == NULL) { if (ipv6_get_lladdr(dev, &addr_buf, @@ -563,18 +556,37 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, saddr = &addr_buf; } - __ndisc_send(dev, neigh, daddr, saddr, - &icmp6h, solicit, - !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0); + if (ipv6_addr_any(saddr)) + inc_opt = 0; + if (inc_opt) + optlen += ndisc_opt_addr_space(dev); + + skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); + if (!skb) + return; + + msg = (struct nd_msg *)skb_put(skb, sizeof(*msg)); + *msg = (struct nd_msg) { + .icmph = { + .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION, + }, + .target = *solicit, + }; + + if (inc_opt) + ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, + dev->dev_addr); + + ndisc_send_skb(skb, daddr, saddr); } void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr) { - struct icmp6hdr icmp6h = { - .icmp6_type = NDISC_ROUTER_SOLICITATION, - }; + struct sk_buff *skb; + struct rs_msg *msg; int send_sllao = dev->addr_len; + int optlen = 0; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD /* @@ -598,9 +610,27 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, } } #endif - __ndisc_send(dev, NULL, daddr, saddr, - &icmp6h, NULL, - send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0); + if (!dev->addr_len) + send_sllao = 0; + if (send_sllao) + optlen += ndisc_opt_addr_space(dev); + + skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); + if (!skb) + return; + + msg = (struct rs_msg *)skb_put(skb, sizeof(*msg)); + *msg = (struct rs_msg) { + .icmph = { + .icmp6_type = NDISC_ROUTER_SOLICITATION, + }, + }; + + if (send_sllao) + ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, + dev->dev_addr); + + ndisc_send_skb(skb, daddr, saddr); } @@ -676,6 +706,11 @@ static void ndisc_recv_ns(struct sk_buff *skb) bool inc; int is_router = -1; + if (skb->len < sizeof(struct nd_msg)) { + ND_PRINTK(2, warn, "NS: packet too short\n"); + return; + } + if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK(2, warn, "NS: multicast target address\n"); return; @@ -685,11 +720,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) * RFC2461 7.1.1: * DAD has to be destined for solicited node multicast address. */ - if (dad && - !(daddr->s6_addr32[0] == htonl(0xff020000) && - daddr->s6_addr32[1] == htonl(0x00000000) && - daddr->s6_addr32[2] == htonl(0x00000001) && - daddr->s6_addr [12] == 0xff )) { + if (dad && !ipv6_addr_is_solict_mult(daddr)) { ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n"); return; } @@ -780,11 +811,11 @@ static void ndisc_recv_ns(struct sk_buff *skb) } if (is_router < 0) - is_router = !!idev->cnf.forwarding; + is_router = idev->cnf.forwarding; if (dad) { ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target, - is_router, 0, (ifp != NULL), 1); + !!is_router, false, (ifp != NULL), true); goto out; } @@ -805,8 +836,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) NEIGH_UPDATE_F_OVERRIDE); if (neigh || !dev->header_ops) { ndisc_send_na(dev, neigh, saddr, &msg->target, - is_router, - 1, (ifp != NULL && inc), inc); + !!is_router, + true, (ifp != NULL && inc), inc); if (neigh) neigh_release(neigh); } @@ -1350,25 +1381,34 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) icmpv6_notify(skb, NDISC_REDIRECT, 0, 0); } +static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb, + struct sk_buff *orig_skb, + int rd_len) +{ + u8 *opt = skb_put(skb, rd_len); + + memset(opt, 0, 8); + *(opt++) = ND_OPT_REDIRECT_HDR; + *(opt++) = (rd_len >> 3); + opt += 6; + + memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8); +} + void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) { struct net_device *dev = skb->dev; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; - int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); + int optlen = 0; struct inet_peer *peer; struct sk_buff *buff; - struct icmp6hdr *icmph; + struct rd_msg *msg; struct in6_addr saddr_buf; - struct in6_addr *addrp; struct rt6_info *rt; struct dst_entry *dst; - struct inet6_dev *idev; struct flowi6 fl6; - u8 *opt; - int hlen, tlen; int rd_len; - int err; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; bool ret; @@ -1424,7 +1464,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) memcpy(ha_buf, neigh->ha, dev->addr_len); read_unlock_bh(&neigh->lock); ha = ha_buf; - len += ndisc_opt_addr_space(dev); + optlen += ndisc_opt_addr_space(dev); } else read_unlock_bh(&neigh->lock); @@ -1432,80 +1472,40 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) } rd_len = min_t(unsigned int, - IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8); + IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen, + skb->len + 8); rd_len &= ~0x7; - len += rd_len; - - hlen = LL_RESERVED_SPACE(dev); - tlen = dev->needed_tailroom; - buff = sock_alloc_send_skb(sk, - (MAX_HEADER + sizeof(struct ipv6hdr) + - len + hlen + tlen), - 1, &err); - if (buff == NULL) { - ND_PRINTK(0, err, - "Redirect: %s failed to allocate an skb, err=%d\n", - __func__, err); - goto release; - } - - skb_reserve(buff, hlen); - ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr, - IPPROTO_ICMPV6, len); - - skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data); - skb_put(buff, len); - icmph = icmp6_hdr(buff); - - memset(icmph, 0, sizeof(struct icmp6hdr)); - icmph->icmp6_type = NDISC_REDIRECT; + optlen += rd_len; - /* - * copy target and destination addresses - */ - - addrp = (struct in6_addr *)(icmph + 1); - *addrp = *target; - addrp++; - *addrp = ipv6_hdr(skb)->daddr; + buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); + if (!buff) + goto release; - opt = (u8*) (addrp + 1); + msg = (struct rd_msg *)skb_put(buff, sizeof(*msg)); + *msg = (struct rd_msg) { + .icmph = { + .icmp6_type = NDISC_REDIRECT, + }, + .target = *target, + .dest = ipv6_hdr(skb)->daddr, + }; /* * include target_address option */ if (ha) - opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha, - dev->addr_len, dev->type); + ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha); /* * build redirect option and copy skb over to the new packet. */ - memset(opt, 0, 8); - *(opt++) = ND_OPT_REDIRECT_HDR; - *(opt++) = (rd_len >> 3); - opt += 6; - - memcpy(opt, ipv6_hdr(skb), rd_len - 8); - - icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr, - len, IPPROTO_ICMPV6, - csum_partial(icmph, len, 0)); + if (rd_len) + ndisc_fill_redirect_hdr_option(buff, skb, rd_len); skb_dst_set(buff, dst); - rcu_read_lock(); - idev = __in6_dev_get(dst->dev); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, - dst_output); - if (!err) { - ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT); - ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); - } - - rcu_read_unlock(); + ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf); return; release: @@ -1522,7 +1522,7 @@ int ndisc_rcv(struct sk_buff *skb) { struct nd_msg *msg; - if (!pskb_may_pull(skb, skb->len)) + if (skb_linearize(skb)) return 0; msg = (struct nd_msg *)skb_transport_header(skb); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 125a90d..341b54a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1098,7 +1098,7 @@ static int get_info(struct net *net, void __user *user, #endif t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), "ip6table_%s", name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { struct ip6t_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -1157,7 +1157,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, } t = xt_find_table_lock(net, AF_INET6, get.name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); if (get.size == private->size) @@ -1197,7 +1197,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), "ip6table_%s", name); - if (!t || IS_ERR(t)) { + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; } @@ -1355,7 +1355,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, } t = xt_find_table_lock(net, AF_INET6, name); - if (!t || IS_ERR(t)) { + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; } @@ -1939,7 +1939,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; duprintf("t->private->number = %u\n", private->number); diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index e948691..cb63114 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -9,47 +9,38 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ipv6.h> +#include <net/ipv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv6/ip6t_NPT.h> #include <linux/netfilter/x_tables.h> -static __sum16 csum16_complement(__sum16 a) -{ - return (__force __sum16)(0xffff - (__force u16)a); -} - -static __sum16 csum16_add(__sum16 a, __sum16 b) -{ - u16 sum; - - sum = (__force u16)a + (__force u16)b; - sum += (__force u16)a < (__force u16)b; - return (__force __sum16)sum; -} - -static __sum16 csum16_sub(__sum16 a, __sum16 b) -{ - return csum16_add(a, csum16_complement(b)); -} - static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) { struct ip6t_npt_tginfo *npt = par->targinfo; - __sum16 src_sum = 0, dst_sum = 0; + __wsum src_sum = 0, dst_sum = 0; + struct in6_addr pfx; unsigned int i; if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) return -EINVAL; + /* Ensure that LSB of prefix is zero */ + ipv6_addr_prefix(&pfx, &npt->src_pfx.in6, npt->src_pfx_len); + if (!ipv6_addr_equal(&pfx, &npt->src_pfx.in6)) + return -EINVAL; + ipv6_addr_prefix(&pfx, &npt->dst_pfx.in6, npt->dst_pfx_len); + if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6)) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) { - src_sum = csum16_add(src_sum, - (__force __sum16)npt->src_pfx.in6.s6_addr16[i]); - dst_sum = csum16_add(dst_sum, - (__force __sum16)npt->dst_pfx.in6.s6_addr16[i]); + src_sum = csum_add(src_sum, + (__force __wsum)npt->src_pfx.in6.s6_addr16[i]); + dst_sum = csum_add(dst_sum, + (__force __wsum)npt->dst_pfx.in6.s6_addr16[i]); } - npt->adjustment = csum16_sub(src_sum, dst_sum); + npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum)); return 0; } @@ -66,11 +57,11 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, if (pfx_len - i >= 32) mask = 0; else - mask = htonl(~((1 << (pfx_len - i)) - 1)); + mask = htonl((1 << (i - pfx_len + 32)) - 1); idx = i / 32; addr->s6_addr32[idx] &= mask; - addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx]; + addr->s6_addr32[idx] |= ~mask & npt->dst_pfx.in6.s6_addr32[idx]; } if (pfx_len <= 48) @@ -85,8 +76,8 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, return false; } - sum = csum16_add((__force __sum16)addr->s6_addr16[idx], - npt->adjustment); + sum = ~csum_fold(csum_add(csum_unfold((__force __sum16)addr->s6_addr16[idx]), + csum_unfold(npt->adjustment))); if (sum == CSUM_MANGLED_0) sum = 0; *(__force __sum16 *)&addr->s6_addr16[idx] = sum; @@ -123,6 +114,7 @@ ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par) static struct xt_target ip6t_npt_target_reg[] __read_mostly = { { .name = "SNPT", + .table = "mangle", .target = ip6t_snpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .checkentry = ip6t_npt_checkentry, @@ -133,6 +125,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = { }, { .name = "DNPT", + .table = "mangle", .target = ip6t_dnpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .checkentry = ip6t_npt_checkentry, diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index fd4fb34..ed3b427 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -126,12 +126,13 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) skb_put(nskb, sizeof(struct ipv6hdr)); skb_reset_network_header(nskb); ip6h = ipv6_hdr(nskb); - *(__be32 *)ip6h = htonl(0x60000000 | (tclass << 20)); + ip6_flow_hdr(ip6h, tclass, 0); ip6h->hop_limit = ip6_dst_hoplimit(dst); ip6h->nexthdr = IPPROTO_TCP; ip6h->saddr = oip6h->daddr; ip6h->daddr = oip6h->saddr; + skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); /* Truncate to length (no data) */ tcph->doff = sizeof(struct tcphdr)/4; diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 5060d54..e0983f3 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -71,6 +71,12 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, return ret; } +static bool rpfilter_is_local(const struct sk_buff *skb) +{ + const struct rt6_info *rt = (const void *) skb_dst(skb); + return rt && (rt->rt6i_flags & RTF_LOCAL); +} + static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rpfilter_info *info = par->matchinfo; @@ -78,7 +84,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) struct ipv6hdr *iph; bool invert = info->flags & XT_RPFILTER_INVERT; - if (par->in->flags & IFF_LOOPBACK) + if (rpfilter_is_local(skb)) return true ^ invert; iph = ipv6_hdr(skb); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 7431121..6134a1e 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/slab.h> +#include <net/ipv6.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); @@ -60,8 +61,8 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) dev_net(out)->ipv6.ip6table_mangle); if (ret != NF_DROP && ret != NF_STOLEN && - (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || - memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || + (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || + !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 6c8ae24..e0e788d 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -127,23 +127,28 @@ nf_nat_ipv6_fn(unsigned int hooknum, ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; - } else + } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; + } break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) { - nf_ct_kill_acct(ct, ctinfo, skb); - return NF_DROP; - } + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; } return nf_nat_packet(ct, ctinfo, hooknum, skb); + +oif_changed: + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; } static unsigned int diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 00ee17c..2b6c226 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -81,8 +81,8 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, } protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); /* - * (protoff == skb->len) mean that the packet doesn't have no data - * except of IPv6 & ext headers. but it's tracked anyway. - YK + * (protoff == skb->len) means the packet has not data, just + * IPv6 and possibly extensions headers, but it is tracked anyway */ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); @@ -104,7 +104,6 @@ static unsigned int ipv6_helper(unsigned int hooknum, const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; - unsigned int ret; __be16 frag_off; int protoff; u8 nexthdr; @@ -130,12 +129,7 @@ static unsigned int ipv6_helper(unsigned int hooknum, return NF_ACCEPT; } - ret = helper->help(skb, protoff, ct, ctinfo); - if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { - nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL, - "nf_ct_%s: dropping packet", helper->name); - } - return ret; + return helper->help(skb, protoff, ct, ctinfo); } static unsigned int ipv6_confirm(unsigned int hooknum, @@ -421,54 +415,43 @@ static int ipv6_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_tcp6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n"); + pr_err("nf_conntrack_tcp6: pernet registration failed\n"); goto out; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_udp6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n"); + pr_err("nf_conntrack_udp6: pernet registration failed\n"); goto cleanup_tcp6; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_icmpv6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n"); + pr_err("nf_conntrack_icmp6: pernet registration failed\n"); goto cleanup_udp6; } - ret = nf_conntrack_l3proto_register(net, - &nf_conntrack_l3proto_ipv6); + ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n"); + pr_err("nf_conntrack_ipv6: pernet registration failed.\n"); goto cleanup_icmpv6; } return 0; cleanup_icmpv6: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_icmpv6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); cleanup_udp6: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_udp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); cleanup_tcp6: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_tcp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6); out: return ret; } static void ipv6_net_exit(struct net *net) { - nf_conntrack_l3proto_unregister(net, - &nf_conntrack_l3proto_ipv6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_icmpv6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_udp6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_tcp6); + nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6); } static struct pernet_operations ipv6_net_ops = { @@ -491,19 +474,52 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) ret = register_pernet_subsys(&ipv6_net_ops); if (ret < 0) - goto cleanup_pernet; + goto cleanup_sockopt; + ret = nf_register_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register pre-routing defrag " "hook.\n"); - goto cleanup_ipv6; + goto cleanup_pernet; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n"); + goto cleanup_hooks; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n"); + goto cleanup_tcp6; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n"); + goto cleanup_udp6; + } + + ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n"); + goto cleanup_icmpv6; } return ret; - cleanup_ipv6: - unregister_pernet_subsys(&ipv6_net_ops); + cleanup_icmpv6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); + cleanup_udp6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); + cleanup_tcp6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); + cleanup_hooks: + nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); cleanup_pernet: + unregister_pernet_subsys(&ipv6_net_ops); + cleanup_sockopt: nf_unregister_sockopt(&so_getorigdst6); return ret; } @@ -511,6 +527,10 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) static void __exit nf_conntrack_l3proto_ipv6_fini(void) { synchronize_net(); + nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); unregister_pernet_subsys(&ipv6_net_ops); nf_unregister_sockopt(&so_getorigdst6); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 22c8ea9..6700069 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -14,6 +14,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "IPv6-nf: " fmt + #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> @@ -97,9 +99,9 @@ static int nf_ct_frag6_sysctl_register(struct net *net) if (table == NULL) goto err_alloc; - table[0].data = &net->ipv6.frags.high_thresh; - table[1].data = &net->ipv6.frags.low_thresh; - table[2].data = &net->ipv6.frags.timeout; + table[0].data = &net->nf_frag.frags.timeout; + table[1].data = &net->nf_frag.frags.low_thresh; + table[2].data = &net->nf_frag.frags.high_thresh; } hdr = register_net_sysctl(net, "net/netfilter", table); @@ -180,13 +182,11 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); local_bh_enable(); - if (q == NULL) - goto oom; - + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); + return NULL; + } return container_of(q, struct frag_queue, q); - -oom: - return NULL; } @@ -311,12 +311,15 @@ found: else fq->q.fragments = skb; - skb->dev = NULL; + if (skb->dev) { + fq->iif = skb->dev->ifindex; + skb->dev = NULL; + } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - atomic_add(skb->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -325,9 +328,8 @@ found: fq->nhoffset = nhoff; fq->q.last_in |= INET_FRAG_FIRST_IN; } - write_lock(&nf_frags.lock); - list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); - write_unlock(&nf_frags.lock); + + inet_frag_lru_move(&fq->q); return 0; discard_fq: @@ -366,7 +368,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) } /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { + if (skb_unclone(head, GFP_ATOMIC)) { pr_debug("skb is cloned but can't expand head"); goto out_oom; } @@ -395,7 +397,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -419,7 +421,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - atomic_sub(head->truesize, &fq->q.net->mem); + sub_frag_mem_limit(&fq->q, head->truesize); head->local_df = 1; head->next = NULL; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 745a320..bbbe53a 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -295,11 +295,11 @@ int snmp6_unregister_dev(struct inet6_dev *idev) static int __net_init ipv6_proc_init_net(struct net *net) { - if (!proc_net_fops_create(net, "sockstat6", S_IRUGO, - &sockstat6_seq_fops)) + if (!proc_create("sockstat6", S_IRUGO, net->proc_net, + &sockstat6_seq_fops)) return -ENOMEM; - if (!proc_net_fops_create(net, "snmp6", S_IRUGO, &snmp6_seq_fops)) + if (!proc_create("snmp6", S_IRUGO, net->proc_net, &snmp6_seq_fops)) goto proc_snmp6_fail; net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net); @@ -308,17 +308,17 @@ static int __net_init ipv6_proc_init_net(struct net *net) return 0; proc_dev_snmp6_fail: - proc_net_remove(net, "snmp6"); + remove_proc_entry("snmp6", net->proc_net); proc_snmp6_fail: - proc_net_remove(net, "sockstat6"); + remove_proc_entry("sockstat6", net->proc_net); return -ENOMEM; } static void __net_exit ipv6_proc_exit_net(struct net *net) { - proc_net_remove(net, "sockstat6"); - proc_net_remove(net, "dev_snmp6"); - proc_net_remove(net, "snmp6"); + remove_proc_entry("sockstat6", net->proc_net); + remove_proc_entry("dev_snmp6", net->proc_net); + remove_proc_entry("snmp6", net->proc_net); } static struct pernet_operations ipv6_proc_ops = { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 6cd29b1..330b5e7 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -71,10 +71,9 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, const struct in6_addr *rmt_addr, int dif) { - struct hlist_node *node; bool is_multicast = ipv6_addr_is_multicast(loc_addr); - sk_for_each_from(sk, node) + sk_for_each_from(sk) if (inet_sk(sk)->inet_num == num) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -507,7 +506,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, sock_recv_ts_and_drops(msg, sk, skb); if (np->rxopt.all) - datagram_recv_ctl(sk, msg, skb); + ip6_datagram_recv_ctl(sk, msg, skb); err = copied; if (flags & MSG_TRUNC) @@ -822,8 +821,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1292,7 +1291,7 @@ static const struct file_operations raw6_seq_fops = { static int __net_init raw6_init_net(struct net *net) { - if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops)) + if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops)) return -ENOMEM; return 0; @@ -1300,7 +1299,7 @@ static int __net_init raw6_init_net(struct net *net) static void __net_exit raw6_exit_net(struct net *net) { - proc_net_remove(net, "raw6"); + remove_proc_entry("raw6", net->proc_net); } static struct pernet_operations raw6_net_ops = { diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index e5253ec..0ba10e5 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -26,6 +26,9 @@ * YOSHIFUJI,H. @USAGI Always remove fragment header to * calculate ICV correctly. */ + +#define pr_fmt(fmt) "IPv6: " fmt + #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> @@ -79,20 +82,8 @@ unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, { u32 c; - c = jhash_3words((__force u32)saddr->s6_addr32[0], - (__force u32)saddr->s6_addr32[1], - (__force u32)saddr->s6_addr32[2], - rnd); - - c = jhash_3words((__force u32)saddr->s6_addr32[3], - (__force u32)daddr->s6_addr32[0], - (__force u32)daddr->s6_addr32[1], - c); - - c = jhash_3words((__force u32)daddr->s6_addr32[2], - (__force u32)daddr->s6_addr32[3], - (__force u32)id, - c); + c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), + (__force u32)id, rnd); return c & (INETFRAGS_HASHSZ - 1); } @@ -197,9 +188,10 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6 hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); - if (q == NULL) + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); return NULL; - + } return container_of(q, struct frag_queue, q); } @@ -327,7 +319,7 @@ found: } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; - atomic_add(skb->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -338,12 +330,18 @@ found: } if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len) - return ip6_frag_reasm(fq, prev, dev); + fq->q.meat == fq->q.len) { + int res; + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + res = ip6_frag_reasm(fq, prev, dev); + skb->_skb_refdst = orefdst; + return res; + } - write_lock(&ip6_frags.lock); - list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); - write_unlock(&ip6_frags.lock); + skb_dst_drop(skb); + inet_frag_lru_move(&fq->q); return -1; discard_fq: @@ -406,7 +404,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_oom; /* If the first fragment is fragmented itself, we split @@ -429,7 +427,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -467,7 +465,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, } fp = next; } - atomic_sub(sum_truesize, &fq->q.net->mem); + sub_frag_mem_limit(&fq->q, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e229a3b..e5fe004 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -145,25 +145,12 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, struct neighbour *n; daddr = choose_neigh_daddr(rt, skb, daddr); - n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); + n = __ipv6_neigh_lookup(dst->dev, daddr); if (n) return n; return neigh_create(&nd_tbl, daddr, dst->dev); } -static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev) -{ - struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway); - if (!n) { - n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev); - if (IS_ERR(n)) - return PTR_ERR(n); - } - rt->n = n; - - return 0; -} - static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .protocol = cpu_to_be16(ETH_P_IPV6), @@ -300,9 +287,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - - if (rt->n) - neigh_release(rt->n); + struct dst_entry *from = dst->from; if (!(rt->dst.flags & DST_HOST)) dst_destroy_metrics_generic(dst); @@ -312,8 +297,8 @@ static void ip6_dst_destroy(struct dst_entry *dst) in6_dev_put(idev); } - if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from) - dst_release(dst->from); + dst->from = NULL; + dst_release(from); if (rt6_has_peer(rt)) { struct inet_peer *peer = rt6_peer_ptr(rt); @@ -354,11 +339,6 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, in6_dev_put(idev); } } - if (rt->n && rt->n->dev == dev) { - rt->n->dev = loopback_dev; - dev_hold(loopback_dev); - dev_put(dev); - } } } @@ -388,15 +368,8 @@ static int rt6_info_hash_nhsfn(unsigned int candidate_count, { unsigned int val = fl6->flowi6_proto; - val ^= (__force u32)fl6->daddr.s6_addr32[0]; - val ^= (__force u32)fl6->daddr.s6_addr32[1]; - val ^= (__force u32)fl6->daddr.s6_addr32[2]; - val ^= (__force u32)fl6->daddr.s6_addr32[3]; - - val ^= (__force u32)fl6->saddr.s6_addr32[0]; - val ^= (__force u32)fl6->saddr.s6_addr32[1]; - val ^= (__force u32)fl6->saddr.s6_addr32[2]; - val ^= (__force u32)fl6->saddr.s6_addr32[3]; + val ^= ipv6_addr_hash(&fl6->daddr); + val ^= ipv6_addr_hash(&fl6->saddr); /* Work only if this not encapsulated */ switch (fl6->flowi6_proto) { @@ -505,24 +478,34 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - neigh = rt ? rt->n : NULL; - if (!neigh || (neigh->nud_state & NUD_VALID)) + if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) return; - read_lock_bh(&neigh->lock); - if (!(neigh->nud_state & NUD_VALID) && + rcu_read_lock_bh(); + neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + if (neigh) { + write_lock(&neigh->lock); + if (neigh->nud_state & NUD_VALID) + goto out; + } + + if (!neigh || time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { struct in6_addr mcaddr; struct in6_addr *target; - neigh->updated = jiffies; - read_unlock_bh(&neigh->lock); + if (neigh) { + neigh->updated = jiffies; + write_unlock(&neigh->lock); + } - target = (struct in6_addr *)&neigh->primary_key; + target = (struct in6_addr *)&rt->rt6i_gateway; addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); } else { - read_unlock_bh(&neigh->lock); +out: + write_unlock(&neigh->lock); } + rcu_read_unlock_bh(); } #else static inline void rt6_probe(struct rt6_info *rt) @@ -549,20 +532,24 @@ static inline bool rt6_check_neigh(struct rt6_info *rt) struct neighbour *neigh; bool ret = false; - neigh = rt->n; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) - ret = true; - else if (neigh) { - read_lock_bh(&neigh->lock); + return true; + + rcu_read_lock_bh(); + neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + if (neigh) { + read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) ret = true; #ifdef CONFIG_IPV6_ROUTER_PREF else if (!(neigh->nud_state & NUD_FAILED)) ret = true; #endif - read_unlock_bh(&neigh->lock); + read_unlock(&neigh->lock); } + rcu_read_unlock_bh(); + return ret; } @@ -838,8 +825,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, rt = ip6_rt_copy(ort, daddr); if (rt) { - int attempts = !in_softirq(); - if (!(rt->rt6i_flags & RTF_GATEWAY)) { if (ort->rt6i_dst.plen != 128 && ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) @@ -855,32 +840,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, rt->rt6i_src.plen = 128; } #endif - - retry: - if (rt6_bind_neighbour(rt, rt->dst.dev)) { - struct net *net = dev_net(rt->dst.dev); - int saved_rt_min_interval = - net->ipv6.sysctl.ip6_rt_gc_min_interval; - int saved_rt_elasticity = - net->ipv6.sysctl.ip6_rt_gc_elasticity; - - if (attempts-- > 0) { - net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; - net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; - - ip6_dst_gc(&net->ipv6.ip6_dst_ops); - - net->ipv6.sysctl.ip6_rt_gc_elasticity = - saved_rt_elasticity; - net->ipv6.sysctl.ip6_rt_gc_min_interval = - saved_rt_min_interval; - goto retry; - } - - net_warn_ratelimited("Neighbour table overflow\n"); - dst_free(&rt->dst); - return NULL; - } } return rt; @@ -891,10 +850,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, { struct rt6_info *rt = ip6_rt_copy(ort, daddr); - if (rt) { + if (rt) rt->rt6i_flags |= RTF_CACHE; - rt->n = neigh_clone(ort->n); - } return rt; } @@ -928,7 +885,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -994,7 +951,7 @@ void ip6_route_input(struct sk_buff *skb) .flowi6_iif = skb->dev->ifindex, .daddr = iph->daddr, .saddr = iph->saddr, - .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, + .flowlabel = ip6_flowinfo(iph), .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, }; @@ -1054,7 +1011,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt->rt6i_gateway = ort->rt6i_gateway; rt->rt6i_flags = ort->rt6i_flags; - rt6_clean_expires(rt); rt->rt6i_metric = 0; memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); @@ -1159,7 +1115,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; - fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + fl6.flowlabel = ip6_flowinfo(iph); dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1187,7 +1143,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; - fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + fl6.flowlabel = ip6_flowinfo(iph); dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1247,7 +1203,6 @@ static struct dst_entry *icmp6_dst_gc_list; static DEFINE_SPINLOCK(icmp6_dst_lock); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, - struct neighbour *neigh, struct flowi6 *fl6) { struct dst_entry *dst; @@ -1265,20 +1220,8 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, goto out; } - if (neigh) - neigh_hold(neigh); - else { - neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr); - if (IS_ERR(neigh)) { - in6_dev_put(idev); - dst_free(&rt->dst); - return ERR_CAST(neigh); - } - } - rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; - rt->n = neigh; atomic_set(&rt->dst.__refcnt, 1); rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; @@ -1587,12 +1530,6 @@ int ip6_route_add(struct fib6_config *cfg) } else rt->rt6i_prefsrc.plen = 0; - if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { - err = rt6_bind_neighbour(rt, dev); - if (err) - goto out; - } - rt->rt6i_flags = cfg->fc_flags; install_route: @@ -1705,37 +1642,32 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu struct net *net = dev_net(skb->dev); struct netevent_redirect netevent; struct rt6_info *rt, *nrt = NULL; - const struct in6_addr *target; struct ndisc_options ndopts; - const struct in6_addr *dest; - struct neighbour *old_neigh; struct inet6_dev *in6_dev; struct neighbour *neigh; - struct icmp6hdr *icmph; + struct rd_msg *msg; int optlen, on_link; u8 *lladdr; optlen = skb->tail - skb->transport_header; - optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); + optlen -= sizeof(*msg); if (optlen < 0) { net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); return; } - icmph = icmp6_hdr(skb); - target = (const struct in6_addr *) (icmph + 1); - dest = target + 1; + msg = (struct rd_msg *)icmp6_hdr(skb); - if (ipv6_addr_is_multicast(dest)) { + if (ipv6_addr_is_multicast(&msg->dest)) { net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); return; } on_link = 0; - if (ipv6_addr_equal(dest, target)) { + if (ipv6_addr_equal(&msg->dest, &msg->target)) { on_link = 1; - } else if (ipv6_addr_type(target) != + } else if (ipv6_addr_type(&msg->target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); return; @@ -1752,7 +1684,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu * first-hop router for the specified ICMP Destination Address. */ - if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { + if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) { net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); return; } @@ -1779,15 +1711,10 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu */ dst_confirm(&rt->dst); - neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); + neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); if (!neigh) return; - /* Duplicate redirect: silently ignore. */ - old_neigh = rt->n; - if (neigh == old_neigh) - goto out; - /* * We have finally decided to accept it. */ @@ -1799,7 +1726,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu NEIGH_UPDATE_F_ISROUTER)) ); - nrt = ip6_rt_copy(rt, dest); + nrt = ip6_rt_copy(rt, &msg->dest); if (!nrt) goto out; @@ -1808,16 +1735,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu nrt->rt6i_flags &= ~RTF_GATEWAY; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; - nrt->n = neigh_clone(neigh); if (ip6_ins_rt(nrt)) goto out; netevent.old = &rt->dst; - netevent.old_neigh = old_neigh; netevent.new = &nrt->dst; - netevent.new_neigh = neigh; - netevent.daddr = dest; + netevent.daddr = &msg->dest; + netevent.neigh = neigh; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); if (rt->rt6i_flags & RTF_CACHE) { @@ -1859,8 +1784,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == (RTF_DEFAULT | RTF_ADDRCONF)) rt6_set_from(rt, ort); - else - rt6_clean_expires(rt); rt->rt6i_metric = 0; #ifdef CONFIG_IPV6_SUBTREES @@ -1992,7 +1915,8 @@ void rt6_purge_dflt_routers(struct net *net) restart: read_lock_bh(&table->tb6_lock); for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { + if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && + (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); ip6_del_rt(rt); @@ -2123,7 +2047,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); - int err; if (!rt) { net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); @@ -2142,11 +2065,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_flags |= RTF_ANYCAST; else rt->rt6i_flags |= RTF_LOCAL; - err = rt6_bind_neighbour(rt, rt->dst.dev); - if (err) { - dst_free(&rt->dst); - return ERR_PTR(err); - } rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; @@ -2492,7 +2410,6 @@ static int rt6_fill_node(struct net *net, struct nlmsghdr *nlh; long expires; u32 table; - struct neighbour *n; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2605,9 +2522,8 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - n = rt->n; - if (n) { - if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) + if (rt->rt6i_flags & RTF_GATEWAY) { + if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0) goto nla_put_failure; } @@ -2802,7 +2718,6 @@ struct rt6_proc_arg static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct seq_file *m = p_arg; - struct neighbour *n; seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); @@ -2811,9 +2726,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif - n = rt->n; - if (n) { - seq_printf(m, "%pi6", n->primary_key); + if (rt->rt6i_flags & RTF_GATEWAY) { + seq_printf(m, "%pi6", &rt->rt6i_gateway); } else { seq_puts(m, "00000000000000000000000000000000"); } @@ -3080,8 +2994,8 @@ static void __net_exit ip6_route_net_exit(struct net *net) static int __net_init ip6_route_net_init_late(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); - proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); + proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); + proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops); #endif return 0; } @@ -3089,8 +3003,8 @@ static int __net_init ip6_route_net_init_late(struct net *net) static void __net_exit ip6_route_net_exit_late(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_remove(net, "ipv6_route"); - proc_net_remove(net, "rt6_stats"); + remove_proc_entry("ipv6_route", net->proc_net); + remove_proc_entry("rt6_stats", net->proc_net); #endif } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index cfba99b..02f96dc 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -72,6 +72,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static int ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); static void ipip6_dev_free(struct net_device *dev); +static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, + __be32 *v4dst); static struct rtnl_link_ops sit_link_ops __read_mostly; static int sit_net_id __read_mostly; @@ -590,17 +592,21 @@ out: return err; } +static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, + const struct in6_addr *v6addr) +{ + __be32 v4embed = 0; + if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed) + return true; + return false; +} + static int ipip6_rcv(struct sk_buff *skb) { - const struct iphdr *iph; + const struct iphdr *iph = ip_hdr(skb); struct ip_tunnel *tunnel; int err; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto out; - - iph = ip_hdr(skb); - tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { @@ -613,10 +619,19 @@ static int ipip6_rcv(struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->pkt_type = PACKET_HOST; - if ((tunnel->dev->priv_flags & IFF_ISATAP) && - !isatap_chksrc(skb, iph, tunnel)) { - tunnel->dev->stats.rx_errors++; - goto out; + if (tunnel->dev->priv_flags & IFF_ISATAP) { + if (!isatap_chksrc(skb, iph, tunnel)) { + tunnel->dev->stats.rx_errors++; + goto out; + } + } else { + if (is_spoofed_6rd(tunnel, iph->saddr, + &ipv6_hdr(skb)->saddr) || + is_spoofed_6rd(tunnel, iph->daddr, + &ipv6_hdr(skb)->daddr)) { + tunnel->dev->stats.rx_errors++; + goto out; + } } __skb_tunnel_rx(skb, tunnel->dev); @@ -650,14 +665,12 @@ out: } /* - * Returns the embedded IPv4 address if the IPv6 address - * comes from 6rd / 6to4 (RFC 3056) addr space. + * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function + * stores the embedded IPv4 address in v4dst and returns true. */ -static inline -__be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel) +static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, + __be32 *v4dst) { - __be32 dst = 0; - #ifdef CONFIG_IPV6_SIT_6RD if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, tunnel->ip6rd.prefixlen)) { @@ -676,14 +689,24 @@ __be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel) d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >> (32 - pbi1); - dst = tunnel->ip6rd.relay_prefix | htonl(d); + *v4dst = tunnel->ip6rd.relay_prefix | htonl(d); + return true; } #else if (v6dst->s6_addr16[0] == htons(0x2002)) { /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ - memcpy(&dst, &v6dst->s6_addr16[1], 4); + memcpy(v4dst, &v6dst->s6_addr16[1], 4); + return true; } #endif + return false; +} + +static inline __be32 try_6rd(struct ip_tunnel *tunnel, + const struct in6_addr *v6dst) +{ + __be32 dst = 0; + check_6rd(tunnel, v6dst, &dst); return dst; } @@ -744,7 +767,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (!dst) - dst = try_6rd(&iph6->daddr, tunnel); + dst = try_6rd(tunnel, &iph6->daddr); if (!dst) { struct neighbour *neigh = NULL; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 4016197..8a0848b 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -179,7 +179,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); - if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) + if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; ret = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 93825dd..46a5be8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -386,9 +386,17 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (dst) dst->ops->redirect(dst, sk, skb); + goto out; } if (type == ICMPV6_PKT_TOOBIG) { + /* We are not interested in TCP_LISTEN and open_requests + * (SYN-ACKs send out by Linux are always <576bytes so + * they should go through unfragmented). + */ + if (sk->sk_state == TCP_LISTEN) + goto out; + tp->mtu_info = ntohl(info); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); @@ -423,6 +431,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } inet_csk_reqsk_queue_drop(sk, req, prev); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); goto out; case TCP_SYN_SENT: @@ -712,7 +721,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { #endif static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, - u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass) + u32 tsval, u32 tsecr, + struct tcp_md5sig_key *key, int rst, u8 tclass) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -724,7 +734,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, struct dst_entry *dst; __be32 *topt; - if (ts) + if (tsecr) tot_len += TCPOLEN_TSTAMP_ALIGNED; #ifdef CONFIG_TCP_MD5SIG if (key) @@ -754,11 +764,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, topt = (__be32 *)(t1 + 1); - if (ts) { + if (tsecr) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - *topt++ = htonl(tcp_time_stamp); - *topt++ = htonl(ts); + *topt++ = htonl(tsval); + *topt++ = htonl(tsecr); } #ifdef CONFIG_TCP_MD5SIG @@ -834,7 +844,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) * no RST generated if md5 hash doesn't match. */ sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), - &tcp_hashinfo, &ipv6h->daddr, + &tcp_hashinfo, &ipv6h->saddr, + th->source, &ipv6h->daddr, ntohs(th->source), inet6_iif(skb)); if (!sk1) return; @@ -858,7 +869,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0); + tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0); #ifdef CONFIG_TCP_MD5SIG release_sk1: @@ -869,10 +880,11 @@ release_sk1: #endif } -static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, +static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, + u32 win, u32 tsval, u32 tsecr, struct tcp_md5sig_key *key, u8 tclass) { - tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass); + tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -882,6 +894,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, + tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), tw->tw_tclass); @@ -891,7 +904,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, + tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, + req->rcv_wnd, tcp_time_stamp, req->ts_recent, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0); } @@ -958,8 +972,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop; } - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); goto drop; + } req = inet6_reqsk_alloc(&tcp6_request_sock_ops); if (req == NULL) @@ -1027,7 +1043,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq->rmt_addr = ipv6_hdr(skb)->saddr; treq->loc_addr = ipv6_hdr(skb)->daddr; if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb); + TCP_ECN_create_request(req, skb, sock_net(sk)); treq->iif = sk->sk_bound_dev_if; @@ -1108,6 +1124,7 @@ drop_and_release: drop_and_free: reqsk_free(req); drop: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; /* don't send reset */ } @@ -1163,7 +1180,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count @@ -1243,7 +1260,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); /* Clone native IPv6 options from listening socket (if any) @@ -1456,7 +1473,7 @@ ipv6_pktoptions: if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; if (np->rxopt.bits.rxtclass) - np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); opt_skb = xchg(&np->pktoptions, opt_skb); @@ -1598,6 +1615,7 @@ do_time_wait: struct sock *sk2; sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, + &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); if (sk2 != NULL) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index dfaa29b..d8e5e85 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -45,6 +45,7 @@ #include <net/tcp_states.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> +#include <net/inet6_hashtables.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -203,7 +204,8 @@ static struct sock *udp6_lib_lookup2(struct net *net, { struct sock *sk, *result; struct hlist_nulls_node *node; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; begin: result = NULL; @@ -214,8 +216,18 @@ begin: if (score > badness) { result = sk; badness = score; - if (score == SCORE2_MAX) + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } else if (score == SCORE2_MAX) goto exact_match; + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* @@ -249,7 +261,8 @@ struct sock *__udp6_lib_lookup(struct net *net, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; rcu_read_lock(); if (hslot->count > 10) { @@ -284,6 +297,17 @@ begin: if (score > badness) { result = sk; badness = score; + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* @@ -443,7 +467,7 @@ try_again: ip_cmsg_recv(msg, skb); } else { if (np->rxopt.all) - datagram_recv_ctl(sk, msg, skb); + ip6_datagram_recv_ctl(sk, msg, skb); } err = copied; @@ -752,40 +776,6 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, return 0; } -static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, - int proto) -{ - int err; - - UDP_SKB_CB(skb)->partial_cov = 0; - UDP_SKB_CB(skb)->cscov = skb->len; - - if (proto == IPPROTO_UDPLITE) { - err = udplite_checksum_init(skb, uh); - if (err) - return err; - } - - if (uh->check == 0) { - /* RFC 2460 section 8.1 says that we SHOULD log - this error. Well, it is reasonable. - */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); - return 1; - } - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, - skb->len, proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - - if (!skb_csum_unnecessary(skb)) - skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len, proto, 0)); - - return 0; -} - int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { @@ -1153,8 +1143,8 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1295,10 +1285,18 @@ do_confirm: void udpv6_destroy_sock(struct sock *sk) { + struct udp_sock *up = udp_sk(sk); lock_sock(sk); udp_v6_flush_pending_frames(sk); release_sock(sk); + if (static_key_false(&udpv6_encap_needed) && up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = ACCESS_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } + inet6_destroy_sock(sk); } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 0c8934a..cf05cf0 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -56,7 +56,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 9f2095b..9bf6a74 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -69,8 +69,8 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; - if (skb_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + err = skb_unclone(skb, GFP_ATOMIC); + if (err) goto out; if (x->props.flags & XFRM_STATE_DECAP_DSCP) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index c984413..4ef7bdb 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -110,7 +110,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ - xdst->u.rt6.n = neigh_clone(rt->n); xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); xdst->u.rt6.rt6i_metric = rt->rt6i_metric; @@ -321,7 +320,51 @@ static struct ctl_table xfrm6_policy_table[] = { { } }; -static struct ctl_table_header *sysctl_hdr; +static int __net_init xfrm6_net_init(struct net *net) +{ + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = xfrm6_policy_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(xfrm6_policy_table), GFP_KERNEL); + if (!table) + goto err_alloc; + + table[0].data = &net->xfrm.xfrm6_dst_ops.gc_thresh; + } + + hdr = register_net_sysctl(net, "net/ipv6", table); + if (!hdr) + goto err_reg; + + net->ipv6.sysctl.xfrm6_hdr = hdr; + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit xfrm6_net_exit(struct net *net) +{ + struct ctl_table *table; + + if (net->ipv6.sysctl.xfrm6_hdr == NULL) + return; + + table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv6.sysctl.xfrm6_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static struct pernet_operations xfrm6_net_ops = { + .init = xfrm6_net_init, + .exit = xfrm6_net_exit, +}; #endif int __init xfrm6_init(void) @@ -340,8 +383,7 @@ int __init xfrm6_init(void) goto out_policy; #ifdef CONFIG_SYSCTL - sysctl_hdr = register_net_sysctl(&init_net, "net/ipv6", - xfrm6_policy_table); + register_pernet_subsys(&xfrm6_net_ops); #endif out: return ret; @@ -353,8 +395,7 @@ out_policy: void xfrm6_fini(void) { #ifdef CONFIG_SYSCTL - if (sysctl_hdr) - unregister_net_sysctl_table(sysctl_hdr); + unregister_pernet_subsys(&xfrm6_net_ops); #endif xfrm6_policy_fini(); xfrm6_state_fini(); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index ee5a706..de2bcfa 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -72,7 +72,7 @@ static inline unsigned int xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *ad { unsigned int h; - h = (__force u32)(addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]); + h = ipv6_addr_hash((const struct in6_addr *)addr); h ^= h >> 16; h ^= h >> 8; h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1; @@ -89,12 +89,11 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const { struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; - struct hlist_node *pos; - hlist_for_each_entry_rcu(x6spi, pos, + hlist_for_each_entry_rcu(x6spi, &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { - if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) + if (xfrm6_addr_equal(&x6spi->addr, saddr)) return x6spi; } @@ -120,9 +119,8 @@ static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi) struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; int index = xfrm6_tunnel_spi_hash_byspi(spi); - struct hlist_node *pos; - hlist_for_each_entry(x6spi, pos, + hlist_for_each_entry(x6spi, &xfrm6_tn->spi_byspi[index], list_byspi) { if (x6spi->spi == spi) @@ -203,15 +201,15 @@ static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) { struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; - struct hlist_node *pos, *n; + struct hlist_node *n; spin_lock_bh(&xfrm6_tunnel_spi_lock); - hlist_for_each_entry_safe(x6spi, pos, n, + hlist_for_each_entry_safe(x6spi, n, &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { - if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) { + if (xfrm6_addr_equal(&x6spi->addr, saddr)) { if (atomic_dec_and_test(&x6spi->refcnt)) { hlist_del_rcu(&x6spi->list_byaddr); hlist_del_rcu(&x6spi->list_byspi); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index dfd6faa..f547a47 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -228,9 +228,8 @@ static struct sock *__ipxitf_find_socket(struct ipx_interface *intrfc, __be16 port) { struct sock *s; - struct hlist_node *node; - sk_for_each(s, node, &intrfc->if_sklist) + sk_for_each(s, &intrfc->if_sklist) if (ipx_sk(s)->port == port) goto found; s = NULL; @@ -259,12 +258,11 @@ static struct sock *ipxitf_find_internal_socket(struct ipx_interface *intrfc, __be16 port) { struct sock *s; - struct hlist_node *node; ipxitf_hold(intrfc); spin_lock_bh(&intrfc->if_sklist_lock); - sk_for_each(s, node, &intrfc->if_sklist) { + sk_for_each(s, &intrfc->if_sklist) { struct ipx_sock *ipxs = ipx_sk(s); if (ipxs->port == port && @@ -282,14 +280,14 @@ found: static void __ipxitf_down(struct ipx_interface *intrfc) { struct sock *s; - struct hlist_node *node, *t; + struct hlist_node *t; /* Delete all routes associated with this interface */ ipxrtr_del_routes(intrfc); spin_lock_bh(&intrfc->if_sklist_lock); /* error sockets */ - sk_for_each_safe(s, node, t, &intrfc->if_sklist) { + sk_for_each_safe(s, t, &intrfc->if_sklist) { struct ipx_sock *ipxs = ipx_sk(s); s->sk_err = ENOLINK; @@ -385,12 +383,11 @@ static int ipxitf_demux_socket(struct ipx_interface *intrfc, int is_broadcast = !memcmp(ipx->ipx_dest.node, ipx_broadcast_node, IPX_NODE_LEN); struct sock *s; - struct hlist_node *node; int rc; spin_lock_bh(&intrfc->if_sklist_lock); - sk_for_each(s, node, &intrfc->if_sklist) { + sk_for_each(s, &intrfc->if_sklist) { struct ipx_sock *ipxs = ipx_sk(s); if (ipxs->port == ipx->ipx_dest.sock && @@ -446,12 +443,11 @@ static struct sock *ncp_connection_hack(struct ipx_interface *intrfc, connection = (((int) *(ncphdr + 9)) << 8) | (int) *(ncphdr + 8); if (connection) { - struct hlist_node *node; /* Now we have to look for a special NCP connection handling * socket. Only these sockets have ipx_ncp_conn != 0, set by * SIOCIPXNCPCONN. */ spin_lock_bh(&intrfc->if_sklist_lock); - sk_for_each(sk, node, &intrfc->if_sklist) + sk_for_each(sk, &intrfc->if_sklist) if (ipx_sk(sk)->ipx_ncp_conn == connection) { sock_hold(sk); goto found; diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 02ff7f2..65e8833 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -103,19 +103,18 @@ out: static __inline__ struct sock *ipx_get_socket_idx(loff_t pos) { struct sock *s = NULL; - struct hlist_node *node; struct ipx_interface *i; list_for_each_entry(i, &ipx_interfaces, node) { spin_lock_bh(&i->if_sklist_lock); - sk_for_each(s, node, &i->if_sklist) { + sk_for_each(s, &i->if_sklist) { if (!pos) break; --pos; } spin_unlock_bh(&i->if_sklist_lock); if (!pos) { - if (node) + if (s) goto found; break; } diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index b833677..e493b33 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1386,6 +1386,8 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); + msg->msg_namelen = 0; + skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); if (!skb) @@ -2567,8 +2569,7 @@ bed: err); /* If watchdog is still activated, kill it! */ - if(timer_pending(&(self->watchdog))) - del_timer(&(self->watchdog)); + del_timer(&(self->watchdog)); IRDA_DEBUG(1, "%s(), ...waking up !\n", __func__); @@ -2584,8 +2585,10 @@ bed: NULL, NULL, NULL); /* Check if the we got some results */ - if (!self->cachedaddr) - return -EAGAIN; /* Didn't find any devices */ + if (!self->cachedaddr) { + err = -EAGAIN; /* Didn't find any devices */ + goto out; + } daddr = self->cachedaddr; /* Cleanup */ self->cachedaddr = 0; diff --git a/net/irda/ircomm/Kconfig b/net/irda/ircomm/Kconfig index 2d4c6b4..19492c1 100644 --- a/net/irda/ircomm/Kconfig +++ b/net/irda/ircomm/Kconfig @@ -1,6 +1,6 @@ config IRCOMM tristate "IrCOMM protocol" - depends on IRDA + depends on IRDA && TTY help Say Y here if you want to build support for the IrCOMM protocol. To compile it as modules, choose M here: the modules will be diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index a68c88c..362ba47 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -280,7 +280,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, struct tty_port *port = &self->port; DECLARE_WAITQUEUE(wait, current); int retval; - int do_clocal = 0, extra_count = 0; + int do_clocal = 0; unsigned long flags; IRDA_DEBUG(2, "%s()\n", __func__ ); @@ -289,8 +289,15 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, * If non-blocking mode is set, or the port is not enabled, * then make the check up front and then exit. */ - if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){ - /* nonblock mode is set or port is not enabled */ + if (test_bit(TTY_IO_ERROR, &tty->flags)) { + port->flags |= ASYNC_NORMAL_ACTIVE; + return 0; + } + + if (filp->f_flags & O_NONBLOCK) { + /* nonblock mode is set */ + if (tty->termios.c_cflag & CBAUD) + tty_port_raise_dtr_rts(port); port->flags |= ASYNC_NORMAL_ACTIVE; IRDA_DEBUG(1, "%s(), O_NONBLOCK requested!\n", __func__ ); return 0; @@ -315,18 +322,16 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, __FILE__, __LINE__, tty->driver->name, port->count); spin_lock_irqsave(&port->lock, flags); - if (!tty_hung_up_p(filp)) { - extra_count = 1; + if (!tty_hung_up_p(filp)) port->count--; - } - spin_unlock_irqrestore(&port->lock, flags); port->blocked_open++; + spin_unlock_irqrestore(&port->lock, flags); while (1) { if (tty->termios.c_cflag & CBAUD) tty_port_raise_dtr_rts(port); - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); if (tty_hung_up_p(filp) || !test_bit(ASYNCB_INITIALIZED, &port->flags)) { @@ -361,13 +366,11 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, __set_current_state(TASK_RUNNING); remove_wait_queue(&port->open_wait, &wait); - if (extra_count) { - /* ++ is not atomic, so this should be protected - Jean II */ - spin_lock_irqsave(&port->lock, flags); + spin_lock_irqsave(&port->lock, flags); + if (!tty_hung_up_p(filp)) port->count++; - spin_unlock_irqrestore(&port->lock, flags); - } port->blocked_open--; + spin_unlock_irqrestore(&port->lock, flags); IRDA_DEBUG(1, "%s(%d):block_til_ready after blocking on %s open_count=%d\n", __FILE__, __LINE__, tty->driver->name, port->count); @@ -452,7 +455,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) self->line, self->port.count); /* Not really used by us, but lets do it anyway */ - tty->low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0; + self->port.low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0; /* * If the port is the middle of closing, bail out now @@ -1136,14 +1139,14 @@ static int ircomm_tty_data_indication(void *instance, void *sap, ircomm_tty_send_initial_parameters(self); ircomm_tty_link_established(self); } + tty_kref_put(tty); /* * Use flip buffer functions since the code may be called from interrupt * context */ - tty_insert_flip_string(tty, skb->data, skb->len); - tty_flip_buffer_push(tty); - tty_kref_put(tty); + tty_insert_flip_string(&self->port, skb->data, skb->len); + tty_flip_buffer_push(&self->port); /* No need to kfree_skb - see ircomm_ttp_data_indication() */ diff --git a/net/irda/iriap.c b/net/irda/iriap.c index e71e85b..e1b37f5 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -303,7 +303,8 @@ static void iriap_disconnect_indication(void *instance, void *sap, { struct iriap_cb *self; - IRDA_DEBUG(4, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]); + IRDA_DEBUG(4, "%s(), reason=%s [%d]\n", __func__, + irlmp_reason_str(reason), reason); self = instance; @@ -495,8 +496,11 @@ static void iriap_getvaluebyclass_confirm(struct iriap_cb *self, /* case CS_ISO_8859_9: */ /* case CS_UNICODE: */ default: - IRDA_DEBUG(0, "%s(), charset %s, not supported\n", - __func__, ias_charset_types[charset]); + IRDA_DEBUG(0, "%s(), charset [%d] %s, not supported\n", + __func__, charset, + charset < ARRAY_SIZE(ias_charset_types) ? + ias_charset_types[charset] : + "(unknown)"); /* Aborting, close connection! */ iriap_disconnect_request(self); diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 6115a44..1064621 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -66,8 +66,15 @@ const char *irlmp_reasons[] = { "LM_LAP_RESET", "LM_INIT_DISCONNECT", "ERROR, NOT USED", + "UNKNOWN", }; +const char *irlmp_reason_str(LM_REASON reason) +{ + reason = min_t(size_t, reason, ARRAY_SIZE(irlmp_reasons) - 1); + return irlmp_reasons[reason]; +} + /* * Function irlmp_init (void) * @@ -747,7 +754,8 @@ void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason, { struct lsap_cb *lsap; - IRDA_DEBUG(1, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]); + IRDA_DEBUG(1, "%s(), reason=%s [%d]\n", __func__, + irlmp_reason_str(reason), reason); IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;); diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 2bb2beb..3c83a1e 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -214,8 +214,7 @@ irnet_get_discovery_log(irnet_socket * ap) * After reading : discoveries = NULL ; disco_index = Y ; disco_number = -1 */ static inline int -irnet_read_discovery_log(irnet_socket * ap, - char * event) +irnet_read_discovery_log(irnet_socket *ap, char *event, int buf_size) { int done_event = 0; @@ -237,12 +236,13 @@ irnet_read_discovery_log(irnet_socket * ap, if(ap->disco_index < ap->disco_number) { /* Write an event */ - sprintf(event, "Found %08x (%s) behind %08x {hints %02X-%02X}\n", - ap->discoveries[ap->disco_index].daddr, - ap->discoveries[ap->disco_index].info, - ap->discoveries[ap->disco_index].saddr, - ap->discoveries[ap->disco_index].hints[0], - ap->discoveries[ap->disco_index].hints[1]); + snprintf(event, buf_size, + "Found %08x (%s) behind %08x {hints %02X-%02X}\n", + ap->discoveries[ap->disco_index].daddr, + ap->discoveries[ap->disco_index].info, + ap->discoveries[ap->disco_index].saddr, + ap->discoveries[ap->disco_index].hints[0], + ap->discoveries[ap->disco_index].hints[1]); DEBUG(CTRL_INFO, "Writing discovery %d : %s\n", ap->disco_index, ap->discoveries[ap->disco_index].info); @@ -282,27 +282,24 @@ irnet_ctrl_read(irnet_socket * ap, size_t count) { DECLARE_WAITQUEUE(wait, current); - char event[64]; /* Max event is 61 char */ + char event[75]; ssize_t ret = 0; DENTER(CTRL_TRACE, "(ap=0x%p, count=%Zd)\n", ap, count); - /* Check if we can write an event out in one go */ - DABORT(count < sizeof(event), -EOVERFLOW, CTRL_ERROR, "Buffer to small.\n"); - #ifdef INITIAL_DISCOVERY /* Check if we have read the log */ - if(irnet_read_discovery_log(ap, event)) + if (irnet_read_discovery_log(ap, event, sizeof(event))) { - /* We have an event !!! Copy it to the user */ - if(copy_to_user(buf, event, strlen(event))) + count = min(strlen(event), count); + if (copy_to_user(buf, event, count)) { DERROR(CTRL_ERROR, "Invalid user space pointer.\n"); return -EFAULT; } DEXIT(CTRL_TRACE, "\n"); - return strlen(event); + return count; } #endif /* INITIAL_DISCOVERY */ @@ -339,79 +336,81 @@ irnet_ctrl_read(irnet_socket * ap, switch(irnet_events.log[ap->event_index].event) { case IRNET_DISCOVER: - sprintf(event, "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].saddr, - irnet_events.log[ap->event_index].hints.byte[0], - irnet_events.log[ap->event_index].hints.byte[1]); + snprintf(event, sizeof(event), + "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].saddr, + irnet_events.log[ap->event_index].hints.byte[0], + irnet_events.log[ap->event_index].hints.byte[1]); break; case IRNET_EXPIRE: - sprintf(event, "Expired %08x (%s) behind %08x {hints %02X-%02X}\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].saddr, - irnet_events.log[ap->event_index].hints.byte[0], - irnet_events.log[ap->event_index].hints.byte[1]); + snprintf(event, sizeof(event), + "Expired %08x (%s) behind %08x {hints %02X-%02X}\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].saddr, + irnet_events.log[ap->event_index].hints.byte[0], + irnet_events.log[ap->event_index].hints.byte[1]); break; case IRNET_CONNECT_TO: - sprintf(event, "Connected to %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "Connected to %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_CONNECT_FROM: - sprintf(event, "Connection from %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "Connection from %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_REQUEST_FROM: - sprintf(event, "Request from %08x (%s) behind %08x\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].saddr); + snprintf(event, sizeof(event), "Request from %08x (%s) behind %08x\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].saddr); break; case IRNET_NOANSWER_FROM: - sprintf(event, "No-answer from %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "No-answer from %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_BLOCKED_LINK: - sprintf(event, "Blocked link with %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "Blocked link with %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_DISCONNECT_FROM: - sprintf(event, "Disconnection from %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "Disconnection from %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_DISCONNECT_TO: - sprintf(event, "Disconnected to %08x (%s)\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name); + snprintf(event, sizeof(event), "Disconnected to %08x (%s)\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name); break; default: - sprintf(event, "Bug\n"); + snprintf(event, sizeof(event), "Bug\n"); } /* Increment our event index */ ap->event_index = (ap->event_index + 1) % IRNET_MAX_EVENTS; DEBUG(CTRL_INFO, "Event is :%s", event); - /* Copy it to the user */ - if(copy_to_user(buf, event, strlen(event))) + count = min(strlen(event), count); + if (copy_to_user(buf, event, count)) { DERROR(CTRL_ERROR, "Invalid user space pointer.\n"); return -EFAULT; } DEXIT(CTRL_TRACE, "\n"); - return strlen(event); + return count; } /*------------------------------------------------------------------*/ diff --git a/net/irda/timer.c b/net/irda/timer.c index 1d552b3..0c4c115 100644 --- a/net/irda/timer.c +++ b/net/irda/timer.c @@ -57,7 +57,7 @@ void irlap_start_query_timer(struct irlap_cb *self, int S, int s) * Basically, we multiply the number of remaining slots by our * slot time, plus add some extra time to properly receive the last * discovery packet (which is longer due to extra discovery info), - * to avoid messing with for incomming connections requests and + * to avoid messing with for incoming connections requests and * to accommodate devices that perform discovery slower than us. * Jean II */ timeout = ((sysctl_slot_timeout * HZ / 1000) * (S - s) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index cd6f7a9..206ce6d 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -49,12 +49,6 @@ static const u8 iprm_shutdown[8] = #define TRGCLS_SIZE (sizeof(((struct iucv_message *)0)->class)) -/* macros to set/get socket control buffer at correct offset */ -#define CB_TAG(skb) ((skb)->cb) /* iucv message tag */ -#define CB_TAG_LEN (sizeof(((struct iucv_message *) 0)->tag)) -#define CB_TRGCLS(skb) ((skb)->cb + CB_TAG_LEN) /* iucv msg target class */ -#define CB_TRGCLS_LEN (TRGCLS_SIZE) - #define __iucv_sock_wait(sk, condition, timeo, ret) \ do { \ DEFINE_WAIT(__wait); \ @@ -156,14 +150,13 @@ static int afiucv_pm_freeze(struct device *dev) { struct iucv_sock *iucv; struct sock *sk; - struct hlist_node *node; int err = 0; #ifdef CONFIG_PM_DEBUG printk(KERN_WARNING "afiucv_pm_freeze\n"); #endif read_lock(&iucv_sk_list.lock); - sk_for_each(sk, node, &iucv_sk_list.head) { + sk_for_each(sk, &iucv_sk_list.head) { iucv = iucv_sk(sk); switch (sk->sk_state) { case IUCV_DISCONN: @@ -194,13 +187,12 @@ static int afiucv_pm_freeze(struct device *dev) static int afiucv_pm_restore_thaw(struct device *dev) { struct sock *sk; - struct hlist_node *node; #ifdef CONFIG_PM_DEBUG printk(KERN_WARNING "afiucv_pm_restore_thaw\n"); #endif read_lock(&iucv_sk_list.lock); - sk_for_each(sk, node, &iucv_sk_list.head) { + sk_for_each(sk, &iucv_sk_list.head) { switch (sk->sk_state) { case IUCV_CONNECTED: sk->sk_err = EPIPE; @@ -390,9 +382,8 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, static struct sock *__iucv_get_sock_by_name(char *nm) { struct sock *sk; - struct hlist_node *node; - sk_for_each(sk, node, &iucv_sk_list.head) + sk_for_each(sk, &iucv_sk_list.head) if (!memcmp(&iucv_sk(sk)->src_name, nm, 8)) return sk; @@ -1144,7 +1135,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, /* increment and save iucv message tag for msg_completion cbk */ txmsg.tag = iucv->send_tag++; - memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); + IUCV_SKB_CB(skb)->tag = txmsg.tag; if (iucv->transport == AF_IUCV_TRANS_HIPER) { atomic_inc(&iucv->msg_sent); @@ -1227,7 +1218,7 @@ static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len) return -ENOMEM; /* copy target class to control buffer of new skb */ - memcpy(CB_TRGCLS(nskb), CB_TRGCLS(skb), CB_TRGCLS_LEN); + IUCV_SKB_CB(nskb)->class = IUCV_SKB_CB(skb)->class; /* copy data fragment */ memcpy(nskb->data, skb->data + copied, size); @@ -1259,7 +1250,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, /* store msg target class in the second 4 bytes of skb ctrl buffer */ /* Note: the first 4 bytes are reserved for msg tag */ - memcpy(CB_TRGCLS(skb), &msg->class, CB_TRGCLS_LEN); + IUCV_SKB_CB(skb)->class = msg->class; /* check for special IPRM messages (e.g. iucv_sock_shutdown) */ if ((msg->flags & IUCV_IPRMDATA) && len > 7) { @@ -1295,6 +1286,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, } } + IUCV_SKB_CB(skb)->offset = 0; if (sock_queue_rcv_skb(sk, skb)) skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb); } @@ -1330,6 +1322,9 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, unsigned int copied, rlen; struct sk_buff *skb, *rskb, *cskb; int err = 0; + u32 offset; + + msg->msg_namelen = 0; if ((sk->sk_state == IUCV_DISCONN) && skb_queue_empty(&iucv->backlog_skb_q) && @@ -1349,13 +1344,14 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - rlen = skb->len; /* real length of skb */ + offset = IUCV_SKB_CB(skb)->offset; + rlen = skb->len - offset; /* real length of skb */ copied = min_t(unsigned int, rlen, len); if (!rlen) sk->sk_shutdown = sk->sk_shutdown | RCV_SHUTDOWN; cskb = skb; - if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) { + if (skb_copy_datagram_iovec(cskb, offset, msg->msg_iov, copied)) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); return -EFAULT; @@ -1373,7 +1369,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, * get the trgcls from the control buffer of the skb due to * fragmentation of original iucv message. */ err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS, - CB_TRGCLS_LEN, CB_TRGCLS(skb)); + sizeof(IUCV_SKB_CB(skb)->class), + (void *)&IUCV_SKB_CB(skb)->class); if (err) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); @@ -1385,9 +1382,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, /* SOCK_STREAM: re-queue skb if it contains unreceived data */ if (sk->sk_type == SOCK_STREAM) { - skb_pull(skb, copied); - if (skb->len) { - skb_queue_head(&sk->sk_receive_queue, skb); + if (copied < rlen) { + IUCV_SKB_CB(skb)->offset = offset + copied; goto done; } } @@ -1406,6 +1402,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, spin_lock_bh(&iucv->message_q.lock); rskb = skb_dequeue(&iucv->backlog_skb_q); while (rskb) { + IUCV_SKB_CB(rskb)->offset = 0; if (sock_queue_rcv_skb(sk, rskb)) { skb_queue_head(&iucv->backlog_skb_q, rskb); @@ -1678,7 +1675,6 @@ static int iucv_callback_connreq(struct iucv_path *path, unsigned char user_data[16]; unsigned char nuser_data[16]; unsigned char src_name[8]; - struct hlist_node *node; struct sock *sk, *nsk; struct iucv_sock *iucv, *niucv; int err; @@ -1689,7 +1685,7 @@ static int iucv_callback_connreq(struct iucv_path *path, read_lock(&iucv_sk_list.lock); iucv = NULL; sk = NULL; - sk_for_each(sk, node, &iucv_sk_list.head) + sk_for_each(sk, &iucv_sk_list.head) if (sk->sk_state == IUCV_LISTEN && !memcmp(&iucv_sk(sk)->src_name, src_name, 8)) { /* @@ -1834,7 +1830,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_lock_irqsave(&list->lock, flags); while (list_skb != (struct sk_buff *)list) { - if (!memcmp(&msg->tag, CB_TAG(list_skb), CB_TAG_LEN)) { + if (msg->tag != IUCV_SKB_CB(list_skb)->tag) { this = list_skb; break; } @@ -2095,6 +2091,7 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) skb_pull(skb, sizeof(struct af_iucv_trans_hdr)); skb_reset_transport_header(skb); skb_reset_network_header(skb); + IUCV_SKB_CB(skb)->offset = 0; spin_lock(&iucv->message_q.lock); if (skb_queue_empty(&iucv->backlog_skb_q)) { if (sock_queue_rcv_skb(sk, skb)) { @@ -2115,7 +2112,6 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct hlist_node *node; struct sock *sk; struct iucv_sock *iucv; struct af_iucv_trans_hdr *trans_hdr; @@ -2132,7 +2128,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, iucv = NULL; sk = NULL; read_lock(&iucv_sk_list.lock); - sk_for_each(sk, node, &iucv_sk_list.head) { + sk_for_each(sk, &iucv_sk_list.head) { if (trans_hdr->flags == AF_IUCV_FLAG_SYN) { if ((!memcmp(&iucv_sk(sk)->src_name, trans_hdr->destAppName, 8)) && @@ -2200,8 +2196,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, /* fall through and receive zero length data */ case 0: /* plain data frame */ - memcpy(CB_TRGCLS(skb), &trans_hdr->iucv_hdr.class, - CB_TRGCLS_LEN); + IUCV_SKB_CB(skb)->class = trans_hdr->iucv_hdr.class; err = afiucv_hs_callback_rx(sk, skb); break; default: @@ -2225,10 +2220,9 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb, struct sk_buff *list_skb; struct sk_buff *nskb; unsigned long flags; - struct hlist_node *node; read_lock_irqsave(&iucv_sk_list.lock, flags); - sk_for_each(sk, node, &iucv_sk_list.head) + sk_for_each(sk, &iucv_sk_list.head) if (sk == isk) { iucv = iucv_sk(sk); break; @@ -2299,14 +2293,13 @@ static int afiucv_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = (struct net_device *)ptr; - struct hlist_node *node; struct sock *sk; struct iucv_sock *iucv; switch (event) { case NETDEV_REBOOT: case NETDEV_GOING_DOWN: - sk_for_each(sk, node, &iucv_sk_list.head) { + sk_for_each(sk, &iucv_sk_list.head) { iucv = iucv_sk(sk); if ((iucv->hs_dev == event_dev) && (sk->sk_state == IUCV_CONNECTED)) { diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 3ad1f9d..4fe76ff 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -831,8 +831,11 @@ static int iucv_reboot_event(struct notifier_block *this, { int i; + if (cpumask_empty(&iucv_irq_cpumask)) + return NOTIFY_DONE; + get_online_cpus(); - on_each_cpu(iucv_block_cpu, NULL, 1); + on_each_cpu_mask(&iucv_irq_cpumask, iucv_block_cpu, NULL, 1); preempt_disable(); for (i = 0; i < iucv_max_pathid; i++) { if (iucv_path_table[i]) @@ -1806,7 +1809,7 @@ static void iucv_external_interrupt(struct ext_code ext_code, struct iucv_irq_data *p; struct iucv_irq_list *work; - kstat_cpu(smp_processor_id()).irqs[EXTINT_IUC]++; + inc_irq_stat(IRQEXT_IUC); p = iucv_irq_data[smp_processor_id()]; if (p->ippathid >= iucv_max_pathid) { WARN_ON(p->ippathid >= iucv_max_pathid); diff --git a/net/key/af_key.c b/net/key/af_key.c index 5b426a6..5b1e5af 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -203,7 +203,6 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, } if (*skb2 != NULL) { if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) { - skb_orphan(*skb2); skb_set_owner_r(*skb2, sk); skb_queue_tail(&sk->sk_receive_queue, *skb2); sk->sk_data_ready(sk, (*skb2)->len); @@ -226,7 +225,6 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; - struct hlist_node *node; struct sk_buff *skb2 = NULL; int err = -ESRCH; @@ -237,7 +235,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, return -ENOMEM; rcu_read_lock(); - sk_for_each_rcu(sk, node, &net_pfkey->table) { + sk_for_each_rcu(sk, &net_pfkey->table) { struct pfkey_sock *pfk = pfkey_sk(sk); int err2; @@ -762,7 +760,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, } /* identity & sensitivity */ - if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr, x->props.family)) + if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family)) size += sizeof(struct sadb_address) + sockaddr_size; if (add_keys) { @@ -816,18 +814,21 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, sa->sadb_sa_auth = 0; if (x->aalg) { struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0); - sa->sadb_sa_auth = a ? a->desc.sadb_alg_id : 0; + sa->sadb_sa_auth = (a && a->pfkey_supported) ? + a->desc.sadb_alg_id : 0; } sa->sadb_sa_encrypt = 0; BUG_ON(x->ealg && x->calg); if (x->ealg) { struct xfrm_algo_desc *a = xfrm_ealg_get_byname(x->ealg->alg_name, 0); - sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0; + sa->sadb_sa_encrypt = (a && a->pfkey_supported) ? + a->desc.sadb_alg_id : 0; } /* KAME compatible: sadb_sa_encrypt is overloaded with calg id */ if (x->calg) { struct xfrm_algo_desc *a = xfrm_calg_get_byname(x->calg->alg_name, 0); - sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0; + sa->sadb_sa_encrypt = (a && a->pfkey_supported) ? + a->desc.sadb_alg_id : 0; } sa->sadb_sa_flags = 0; @@ -909,8 +910,8 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, if (!addr->sadb_address_prefixlen) BUG(); - if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr, - x->props.family)) { + if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, + x->props.family)) { addr = (struct sadb_address*) skb_put(skb, sizeof(struct sadb_address)+sockaddr_size); addr->sadb_address_len = @@ -1138,7 +1139,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (sa->sadb_sa_auth) { int keysize = 0; struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth); - if (!a) { + if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } @@ -1160,7 +1161,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (sa->sadb_sa_encrypt) { if (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP) { struct xfrm_algo_desc *a = xfrm_calg_get_byid(sa->sadb_sa_encrypt); - if (!a) { + if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } @@ -1172,7 +1173,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, } else { int keysize = 0; struct xfrm_algo_desc *a = xfrm_ealg_get_byid(sa->sadb_sa_encrypt); - if (!a) { + if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } @@ -1321,7 +1322,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ if (hdr->sadb_msg_seq) { x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); - if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) { + if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; } @@ -1578,13 +1579,13 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, struct sadb_msg *hdr; int len, auth_len, enc_len, i; - auth_len = xfrm_count_auth_supported(); + auth_len = xfrm_count_pfkey_auth_supported(); if (auth_len) { auth_len *= sizeof(struct sadb_alg); auth_len += sizeof(struct sadb_supported); } - enc_len = xfrm_count_enc_supported(); + enc_len = xfrm_count_pfkey_enc_supported(); if (enc_len) { enc_len *= sizeof(struct sadb_alg); enc_len += sizeof(struct sadb_supported); @@ -1615,6 +1616,8 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; + if (!aalg->pfkey_supported) + continue; if (aalg->available) *ap++ = aalg->desc; } @@ -1634,6 +1637,8 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; + if (!ealg->pfkey_supported) + continue; if (ealg->available) *ap++ = ealg->desc; } @@ -2196,7 +2201,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW); xp->priority = pol->sadb_x_policy_priority; - sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1], + sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr); if (!xp->family) { err = -EINVAL; @@ -2209,7 +2214,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ if (xp->selector.sport) xp->selector.sport_mask = htons(0xffff); - sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], + sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr); xp->selector.prefixlen_d = sa->sadb_address_prefixlen; @@ -2310,7 +2315,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa memset(&sel, 0, sizeof(sel)); - sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1], + sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr); sel.prefixlen_s = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); @@ -2318,7 +2323,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa if (sel.sport) sel.sport_mask = htons(0xffff); - sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], + sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); sel.prefixlen_d = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); @@ -2688,6 +2693,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; + hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; @@ -2825,6 +2831,8 @@ static int count_ah_combs(const struct xfrm_tmpl *t) const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; + if (!aalg->pfkey_supported) + continue; if (aalg_tmpl_set(t, aalg) && aalg->available) sz += sizeof(struct sadb_comb); } @@ -2840,6 +2848,9 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg) break; + if (!ealg->pfkey_supported) + continue; + if (!(ealg_tmpl_set(t, ealg) && ealg->available)) continue; @@ -2848,6 +2859,9 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg) break; + if (!aalg->pfkey_supported) + continue; + if (aalg_tmpl_set(t, aalg) && aalg->available) sz += sizeof(struct sadb_comb); } @@ -2871,6 +2885,9 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) if (!aalg) break; + if (!aalg->pfkey_supported) + continue; + if (aalg_tmpl_set(t, aalg) && aalg->available) { struct sadb_comb *c; c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb)); @@ -2903,6 +2920,9 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) if (!ealg) break; + if (!ealg->pfkey_supported) + continue; + if (!(ealg_tmpl_set(t, ealg) && ealg->available)) continue; @@ -2911,6 +2931,8 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); if (!aalg) break; + if (!aalg->pfkey_supported) + continue; if (!(aalg_tmpl_set(t, aalg) && aalg->available)) continue; c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb)); @@ -3718,7 +3740,7 @@ static int __net_init pfkey_init_proc(struct net *net) { struct proc_dir_entry *e; - e = proc_net_fops_create(net, "pfkey", 0, &pfkey_proc_ops); + e = proc_create("pfkey", 0, net->proc_net, &pfkey_proc_ops); if (e == NULL) return -ENOMEM; @@ -3727,7 +3749,7 @@ static int __net_init pfkey_init_proc(struct net *net) static void __net_exit pfkey_exit_proc(struct net *net) { - proc_net_remove(net, "pfkey"); + remove_proc_entry("pfkey", net->proc_net); } #else static inline int pfkey_init_proc(struct net *net) diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig index 147a8fd..adb9843 100644 --- a/net/l2tp/Kconfig +++ b/net/l2tp/Kconfig @@ -46,8 +46,8 @@ config L2TP_DEBUGFS will be called l2tp_debugfs. config L2TP_V3 - bool "L2TPv3 support (EXPERIMENTAL)" - depends on EXPERIMENTAL && L2TP + bool "L2TPv3 support" + depends on L2TP help Layer Two Tunneling Protocol Version 3 diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 1a9f372..8aecf5d 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -101,6 +101,7 @@ struct l2tp_skb_cb { static atomic_t l2tp_tunnel_count; static atomic_t l2tp_session_count; +static struct workqueue_struct *l2tp_wq; /* per-net private data for this module */ static unsigned int l2tp_net_id; @@ -113,7 +114,6 @@ struct l2tp_net { static void l2tp_session_set_header_len(struct l2tp_session *session, int version); static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); -static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); static inline struct l2tp_net *l2tp_pernet(struct net *net) { @@ -122,7 +122,6 @@ static inline struct l2tp_net *l2tp_pernet(struct net *net) return net_generic(net, l2tp_net_id); } - /* Tunnel reference counts. Incremented per session that is added to * the tunnel. */ @@ -168,6 +167,53 @@ l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id) } +/* Lookup the tunnel socket, possibly involving the fs code if the socket is + * owned by userspace. A struct sock returned from this function must be + * released using l2tp_tunnel_sock_put once you're done with it. + */ +struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) +{ + int err = 0; + struct socket *sock = NULL; + struct sock *sk = NULL; + + if (!tunnel) + goto out; + + if (tunnel->fd >= 0) { + /* Socket is owned by userspace, who might be in the process + * of closing it. Look the socket up using the fd to ensure + * consistency. + */ + sock = sockfd_lookup(tunnel->fd, &err); + if (sock) + sk = sock->sk; + } else { + /* Socket is owned by kernelspace */ + sk = tunnel->sock; + sock_hold(sk); + } + +out: + return sk; +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_lookup); + +/* Drop a reference to a tunnel socket obtained via. l2tp_tunnel_sock_put */ +void l2tp_tunnel_sock_put(struct sock *sk) +{ + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + if (tunnel) { + if (tunnel->fd >= 0) { + /* Socket is owned by userspace */ + sockfd_put(sk->sk_socket); + } + sock_put(sk); + } + sock_put(sk); +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put); + /* Lookup a session by id in the global session list */ static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id) @@ -176,10 +222,9 @@ static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id) struct hlist_head *session_list = l2tp_session_id_hash_2(pn, session_id); struct l2tp_session *session; - struct hlist_node *walk; rcu_read_lock_bh(); - hlist_for_each_entry_rcu(session, walk, session_list, global_hlist) { + hlist_for_each_entry_rcu(session, session_list, global_hlist) { if (session->session_id == session_id) { rcu_read_unlock_bh(); return session; @@ -208,7 +253,6 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn { struct hlist_head *session_list; struct l2tp_session *session; - struct hlist_node *walk; /* In L2TPv3, session_ids are unique over all tunnels and we * sometimes need to look them up before we know the @@ -219,7 +263,7 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn session_list = l2tp_session_id_hash(tunnel, session_id); read_lock_bh(&tunnel->hlist_lock); - hlist_for_each_entry(session, walk, session_list, hlist) { + hlist_for_each_entry(session, session_list, hlist) { if (session->session_id == session_id) { read_unlock_bh(&tunnel->hlist_lock); return session; @@ -234,13 +278,12 @@ EXPORT_SYMBOL_GPL(l2tp_session_find); struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) { int hash; - struct hlist_node *walk; struct l2tp_session *session; int count = 0; read_lock_bh(&tunnel->hlist_lock); for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { - hlist_for_each_entry(session, walk, &tunnel->session_hlist[hash], hlist) { + hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { if (++count > nth) { read_unlock_bh(&tunnel->hlist_lock); return session; @@ -261,12 +304,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) { struct l2tp_net *pn = l2tp_pernet(net); int hash; - struct hlist_node *walk; struct l2tp_session *session; rcu_read_lock_bh(); for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) { - hlist_for_each_entry_rcu(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) { + hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) { if (!strcmp(session->ifname, ifname)) { rcu_read_unlock_bh(); return session; @@ -332,10 +374,8 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk struct sk_buff *skbp; struct sk_buff *tmp; u32 ns = L2TP_SKB_CB(skb)->ns; - struct l2tp_stats *sstats; spin_lock_bh(&session->reorder_q.lock); - sstats = &session->stats; skb_queue_walk_safe(&session->reorder_q, skbp, tmp) { if (L2TP_SKB_CB(skbp)->ns > ns) { __skb_queue_before(&session->reorder_q, skbp, skb); @@ -343,9 +383,7 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n", session->name, ns, L2TP_SKB_CB(skbp)->ns, skb_queue_len(&session->reorder_q)); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_oos_packets++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_oos_packets); goto out; } } @@ -362,23 +400,16 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff * { struct l2tp_tunnel *tunnel = session->tunnel; int length = L2TP_SKB_CB(skb)->length; - struct l2tp_stats *tstats, *sstats; /* We're about to requeue the skb, so return resources * to its current owner (a socket receive buffer). */ skb_orphan(skb); - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - sstats = &session->stats; - u64_stats_update_begin(&sstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += length; - sstats->rx_packets++; - sstats->rx_bytes += length; - u64_stats_update_end(&tstats->syncp); - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&tunnel->stats.rx_packets); + atomic_long_add(length, &tunnel->stats.rx_bytes); + atomic_long_inc(&session->stats.rx_packets); + atomic_long_add(length, &session->stats.rx_bytes); if (L2TP_SKB_CB(skb)->has_seq) { /* Bump our Nr */ @@ -409,7 +440,6 @@ static void l2tp_recv_dequeue(struct l2tp_session *session) { struct sk_buff *skb; struct sk_buff *tmp; - struct l2tp_stats *sstats; /* If the pkt at the head of the queue has the nr that we * expect to send up next, dequeue it and any other @@ -417,13 +447,10 @@ static void l2tp_recv_dequeue(struct l2tp_session *session) */ start: spin_lock_bh(&session->reorder_q.lock); - sstats = &session->stats; skb_queue_walk_safe(&session->reorder_q, skb, tmp) { if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) { - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - sstats->rx_errors++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); + atomic_long_inc(&session->stats.rx_errors); l2tp_dbg(session, L2TP_MSG_SEQ, "%s: oos pkt %u len %d discarded (too old), waiting for %u, reorder_q_len=%d\n", session->name, L2TP_SKB_CB(skb)->ns, @@ -582,7 +609,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, struct l2tp_tunnel *tunnel = session->tunnel; int offset; u32 ns, nr; - struct l2tp_stats *sstats = &session->stats; /* The ref count is increased since we now hold a pointer to * the session. Take care to decrement the refcnt when exiting @@ -599,9 +625,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, "%s: cookie mismatch (%u/%u). Discarding.\n", tunnel->name, tunnel->tunnel_id, session->session_id); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_cookie_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_cookie_discards); goto discard; } ptr += session->peer_cookie_len; @@ -670,9 +694,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, l2tp_warn(session, L2TP_MSG_SEQ, "%s: recv data has no seq numbers when required. Discarding.\n", session->name); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } @@ -691,9 +713,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, l2tp_warn(session, L2TP_MSG_SEQ, "%s: recv data has no seq numbers when required. Discarding.\n", session->name); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } } @@ -747,9 +767,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * packets */ if (L2TP_SKB_CB(skb)->ns != session->nr) { - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); l2tp_dbg(session, L2TP_MSG_SEQ, "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", session->name, L2TP_SKB_CB(skb)->ns, @@ -775,9 +793,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, return; discard: - u64_stats_update_begin(&sstats->syncp); - sstats->rx_errors++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); if (session->deref) @@ -787,6 +803,23 @@ discard: } EXPORT_SYMBOL(l2tp_recv_common); +/* Drop skbs from the session's reorder_q + */ +int l2tp_session_queue_purge(struct l2tp_session *session) +{ + struct sk_buff *skb = NULL; + BUG_ON(!session); + BUG_ON(session->magic != L2TP_SESSION_MAGIC); + while ((skb = skb_dequeue(&session->reorder_q))) { + atomic_long_inc(&session->stats.rx_errors); + kfree_skb(skb); + if (session->deref) + (*session->deref)(session); + } + return 0; +} +EXPORT_SYMBOL_GPL(l2tp_session_queue_purge); + /* Internal UDP receive frame. Do the real work of receiving an L2TP data frame * here. The skb is not on a list when we get here. * Returns 0 if the packet was a data packet and was successfully passed on. @@ -802,7 +835,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, u32 tunnel_id, session_id; u16 version; int length; - struct l2tp_stats *tstats; if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb)) goto discard_bad_csum; @@ -891,10 +923,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, discard_bad_csum: LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name); UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0); - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - tstats->rx_errors++; - u64_stats_update_end(&tstats->syncp); + atomic_long_inc(&tunnel->stats.rx_errors); kfree_skb(skb); return 0; @@ -1021,7 +1050,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, struct l2tp_tunnel *tunnel = session->tunnel; unsigned int len = skb->len; int error; - struct l2tp_stats *tstats, *sstats; /* Debug */ if (session->send_seq) @@ -1050,21 +1078,15 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, error = ip_queue_xmit(skb, fl); /* Update stats */ - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - sstats = &session->stats; - u64_stats_update_begin(&sstats->syncp); if (error >= 0) { - tstats->tx_packets++; - tstats->tx_bytes += len; - sstats->tx_packets++; - sstats->tx_bytes += len; + atomic_long_inc(&tunnel->stats.tx_packets); + atomic_long_add(len, &tunnel->stats.tx_bytes); + atomic_long_inc(&session->stats.tx_packets); + atomic_long_add(len, &session->stats.tx_bytes); } else { - tstats->tx_errors++; - sstats->tx_errors++; + atomic_long_inc(&tunnel->stats.tx_errors); + atomic_long_inc(&session->stats.tx_errors); } - u64_stats_update_end(&tstats->syncp); - u64_stats_update_end(&sstats->syncp); return 0; } @@ -1123,8 +1145,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len struct udphdr *uh; struct inet_sock *inet; __wsum csum; - int old_headroom; - int new_headroom; int headroom; int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; int udp_len; @@ -1136,16 +1156,12 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len */ headroom = NET_SKB_PAD + sizeof(struct iphdr) + uhlen + hdr_len; - old_headroom = skb_headroom(skb); if (skb_cow_head(skb, headroom)) { kfree_skb(skb); return NET_XMIT_DROP; } - new_headroom = skb_headroom(skb); skb_orphan(skb); - skb->truesize += new_headroom - old_headroom; - /* Setup L2TP header */ session->build_header(session, __skb_push(skb, hdr_len)); @@ -1232,6 +1248,7 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); static void l2tp_tunnel_destruct(struct sock *sk) { struct l2tp_tunnel *tunnel; + struct l2tp_net *pn; tunnel = sk->sk_user_data; if (tunnel == NULL) @@ -1239,38 +1256,44 @@ static void l2tp_tunnel_destruct(struct sock *sk) l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name); - /* Close all sessions */ - l2tp_tunnel_closeall(tunnel); + /* Disable udp encapsulation */ switch (tunnel->encap) { case L2TP_ENCAPTYPE_UDP: /* No longer an encapsulation socket. See net/ipv4/udp.c */ (udp_sk(sk))->encap_type = 0; (udp_sk(sk))->encap_rcv = NULL; + (udp_sk(sk))->encap_destroy = NULL; break; case L2TP_ENCAPTYPE_IP: break; } /* Remove hooks into tunnel socket */ - tunnel->sock = NULL; sk->sk_destruct = tunnel->old_sk_destruct; sk->sk_user_data = NULL; + tunnel->sock = NULL; - /* Call the original destructor */ - if (sk->sk_destruct) - (*sk->sk_destruct)(sk); + /* Remove the tunnel struct from the tunnel list */ + pn = l2tp_pernet(tunnel->l2tp_net); + spin_lock_bh(&pn->l2tp_tunnel_list_lock); + list_del_rcu(&tunnel->list); + spin_unlock_bh(&pn->l2tp_tunnel_list_lock); + atomic_dec(&l2tp_tunnel_count); - /* We're finished with the socket */ + l2tp_tunnel_closeall(tunnel); l2tp_tunnel_dec_refcount(tunnel); + /* Call the original destructor */ + if (sk->sk_destruct) + (*sk->sk_destruct)(sk); end: return; } /* When the tunnel is closed, all the attached sessions need to go too. */ -static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) +void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) { int hash; struct hlist_node *walk; @@ -1293,25 +1316,13 @@ again: hlist_del_init(&session->hlist); - /* Since we should hold the sock lock while - * doing any unbinding, we need to release the - * lock we're holding before taking that lock. - * Hold a reference to the sock so it doesn't - * disappear as we're jumping between locks. - */ if (session->ref != NULL) (*session->ref)(session); write_unlock_bh(&tunnel->hlist_lock); - if (tunnel->version != L2TP_HDR_VER_2) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - - spin_lock_bh(&pn->l2tp_session_hlist_lock); - hlist_del_init_rcu(&session->global_hlist); - spin_unlock_bh(&pn->l2tp_session_hlist_lock); - synchronize_rcu(); - } + __l2tp_session_unhash(session); + l2tp_session_queue_purge(session); if (session->session_close != NULL) (*session->session_close)(session); @@ -1319,6 +1330,8 @@ again: if (session->deref != NULL) (*session->deref)(session); + l2tp_session_dec_refcount(session); + write_lock_bh(&tunnel->hlist_lock); /* Now restart from the beginning of this hash @@ -1331,54 +1344,96 @@ again: } write_unlock_bh(&tunnel->hlist_lock); } +EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall); + +/* Tunnel socket destroy hook for UDP encapsulation */ +static void l2tp_udp_encap_destroy(struct sock *sk) +{ + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } +} /* Really kill the tunnel. * Come here only when all sessions have been cleared from the tunnel. */ static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - BUG_ON(atomic_read(&tunnel->ref_count) != 0); BUG_ON(tunnel->sock != NULL); - l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: free...\n", tunnel->name); - - /* Remove from tunnel list */ - spin_lock_bh(&pn->l2tp_tunnel_list_lock); - list_del_rcu(&tunnel->list); kfree_rcu(tunnel, rcu); - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); +} - atomic_dec(&l2tp_tunnel_count); +/* Workqueue tunnel deletion function */ +static void l2tp_tunnel_del_work(struct work_struct *work) +{ + struct l2tp_tunnel *tunnel = NULL; + struct socket *sock = NULL; + struct sock *sk = NULL; + + tunnel = container_of(work, struct l2tp_tunnel, del_work); + sk = l2tp_tunnel_sock_lookup(tunnel); + if (!sk) + return; + + sock = sk->sk_socket; + + /* If the tunnel socket was created by userspace, then go through the + * inet layer to shut the socket down, and let userspace close it. + * Otherwise, if we created the socket directly within the kernel, use + * the sk API to release it here. + * In either case the tunnel resources are freed in the socket + * destructor when the tunnel socket goes away. + */ + if (tunnel->fd >= 0) { + if (sock) + inet_shutdown(sock, 2); + } else { + if (sock) + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sk); + } + + l2tp_tunnel_sock_put(sk); } /* Create a socket for the tunnel, if one isn't set up by * userspace. This is used for static tunnels where there is no * managing L2TP daemon. + * + * Since we don't want these sockets to keep a namespace alive by + * themselves, we drop the socket's namespace refcount after creation. + * These sockets are freed when the namespace exits using the pernet + * exit hook. */ -static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct socket **sockp) +static int l2tp_tunnel_sock_create(struct net *net, + u32 tunnel_id, + u32 peer_tunnel_id, + struct l2tp_tunnel_cfg *cfg, + struct socket **sockp) { int err = -EINVAL; - struct sockaddr_in udp_addr; + struct socket *sock = NULL; + struct sockaddr_in udp_addr = {0}; + struct sockaddr_l2tpip ip_addr = {0}; #if IS_ENABLED(CONFIG_IPV6) - struct sockaddr_in6 udp6_addr; - struct sockaddr_l2tpip6 ip6_addr; + struct sockaddr_in6 udp6_addr = {0}; + struct sockaddr_l2tpip6 ip6_addr = {0}; #endif - struct sockaddr_l2tpip ip_addr; - struct socket *sock = NULL; switch (cfg->encap) { case L2TP_ENCAPTYPE_UDP: #if IS_ENABLED(CONFIG_IPV6) if (cfg->local_ip6 && cfg->peer_ip6) { - err = sock_create(AF_INET6, SOCK_DGRAM, 0, sockp); + err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); if (err < 0) goto out; - sock = *sockp; + sk_change_net(sock->sk, net); - memset(&udp6_addr, 0, sizeof(udp6_addr)); udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, cfg->local_ip6, sizeof(udp6_addr.sin6_addr)); @@ -1400,13 +1455,12 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t } else #endif { - err = sock_create(AF_INET, SOCK_DGRAM, 0, sockp); + err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock); if (err < 0) goto out; - sock = *sockp; + sk_change_net(sock->sk, net); - memset(&udp_addr, 0, sizeof(udp_addr)); udp_addr.sin_family = AF_INET; udp_addr.sin_addr = cfg->local_ip; udp_addr.sin_port = htons(cfg->local_udp_port); @@ -1433,14 +1487,13 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t case L2TP_ENCAPTYPE_IP: #if IS_ENABLED(CONFIG_IPV6) if (cfg->local_ip6 && cfg->peer_ip6) { - err = sock_create(AF_INET6, SOCK_DGRAM, IPPROTO_L2TP, - sockp); + err = sock_create_kern(AF_INET6, SOCK_DGRAM, + IPPROTO_L2TP, &sock); if (err < 0) goto out; - sock = *sockp; + sk_change_net(sock->sk, net); - memset(&ip6_addr, 0, sizeof(ip6_addr)); ip6_addr.l2tp_family = AF_INET6; memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6, sizeof(ip6_addr.l2tp_addr)); @@ -1462,14 +1515,13 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t } else #endif { - err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_L2TP, - sockp); + err = sock_create_kern(AF_INET, SOCK_DGRAM, + IPPROTO_L2TP, &sock); if (err < 0) goto out; - sock = *sockp; + sk_change_net(sock->sk, net); - memset(&ip_addr, 0, sizeof(ip_addr)); ip_addr.l2tp_family = AF_INET; ip_addr.l2tp_addr = cfg->local_ip; ip_addr.l2tp_conn_id = tunnel_id; @@ -1493,8 +1545,10 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t } out: + *sockp = sock; if ((err < 0) && sock) { - sock_release(sock); + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sock->sk); *sockp = NULL; } @@ -1517,15 +1571,23 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 * kernel socket. */ if (fd < 0) { - err = l2tp_tunnel_sock_create(tunnel_id, peer_tunnel_id, cfg, &sock); + err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id, + cfg, &sock); if (err < 0) goto err; } else { - err = -EBADF; sock = sockfd_lookup(fd, &err); if (!sock) { - pr_err("tunl %hu: sockfd_lookup(fd=%d) returned %d\n", + pr_err("tunl %u: sockfd_lookup(fd=%d) returned %d\n", tunnel_id, fd, err); + err = -EBADF; + goto err; + } + + /* Reject namespace mismatches */ + if (!net_eq(sock_net(sock->sk), net)) { + pr_err("tunl %u: netns mismatch\n", tunnel_id); + err = -EINVAL; goto err; } } @@ -1591,6 +1653,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP; udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; + udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET6) udpv6_encap_enable(); @@ -1607,10 +1670,14 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 tunnel->old_sk_destruct = sk->sk_destruct; sk->sk_destruct = &l2tp_tunnel_destruct; tunnel->sock = sk; + tunnel->fd = fd; lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock"); sk->sk_allocation = GFP_ATOMIC; + /* Init delete workqueue struct */ + INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work); + /* Add tunnel to our list */ INIT_LIST_HEAD(&tunnel->list); atomic_inc(&l2tp_tunnel_count); @@ -1642,25 +1709,8 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); */ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { - int err = 0; - struct socket *sock = tunnel->sock ? tunnel->sock->sk_socket : NULL; - - /* Force the tunnel socket to close. This will eventually - * cause the tunnel to be deleted via the normal socket close - * mechanisms when userspace closes the tunnel socket. - */ - if (sock != NULL) { - err = inet_shutdown(sock, 2); - - /* If the tunnel's socket was created by the kernel, - * close the socket here since the socket was not - * created by userspace. - */ - if (sock->file == NULL) - err = inet_release(sock); - } - - return err; + l2tp_tunnel_closeall(tunnel); + return (false == queue_work(l2tp_wq, &tunnel->del_work)); } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); @@ -1668,62 +1718,71 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); */ void l2tp_session_free(struct l2tp_session *session) { - struct l2tp_tunnel *tunnel; + struct l2tp_tunnel *tunnel = session->tunnel; BUG_ON(atomic_read(&session->ref_count) != 0); - tunnel = session->tunnel; - if (tunnel != NULL) { + if (tunnel) { BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); + if (session->session_id != 0) + atomic_dec(&l2tp_session_count); + sock_put(tunnel->sock); + session->tunnel = NULL; + l2tp_tunnel_dec_refcount(tunnel); + } + + kfree(session); + + return; +} +EXPORT_SYMBOL_GPL(l2tp_session_free); + +/* Remove an l2tp session from l2tp_core's hash lists. + * Provides a tidyup interface for pseudowire code which can't just route all + * shutdown via. l2tp_session_delete and a pseudowire-specific session_close + * callback. + */ +void __l2tp_session_unhash(struct l2tp_session *session) +{ + struct l2tp_tunnel *tunnel = session->tunnel; - /* Delete the session from the hash */ + /* Remove the session from core hashes */ + if (tunnel) { + /* Remove from the per-tunnel hash */ write_lock_bh(&tunnel->hlist_lock); hlist_del_init(&session->hlist); write_unlock_bh(&tunnel->hlist_lock); - /* Unlink from the global hash if not L2TPv2 */ + /* For L2TPv3 we have a per-net hash: remove from there, too */ if (tunnel->version != L2TP_HDR_VER_2) { struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - spin_lock_bh(&pn->l2tp_session_hlist_lock); hlist_del_init_rcu(&session->global_hlist); spin_unlock_bh(&pn->l2tp_session_hlist_lock); synchronize_rcu(); } - - if (session->session_id != 0) - atomic_dec(&l2tp_session_count); - - sock_put(tunnel->sock); - - /* This will delete the tunnel context if this - * is the last session on the tunnel. - */ - session->tunnel = NULL; - l2tp_tunnel_dec_refcount(tunnel); } - - kfree(session); - - return; } -EXPORT_SYMBOL_GPL(l2tp_session_free); +EXPORT_SYMBOL_GPL(__l2tp_session_unhash); /* This function is used by the netlink SESSION_DELETE command and by pseudowire modules. */ int l2tp_session_delete(struct l2tp_session *session) { + if (session->ref) + (*session->ref)(session); + __l2tp_session_unhash(session); + l2tp_session_queue_purge(session); if (session->session_close != NULL) (*session->session_close)(session); - + if (session->deref) + (*session->ref)(session); l2tp_session_dec_refcount(session); - return 0; } EXPORT_SYMBOL_GPL(l2tp_session_delete); - /* We come here whenever a session's send_seq, cookie_len or * l2specific_len parameters are set. */ @@ -1844,8 +1903,21 @@ static __net_init int l2tp_init_net(struct net *net) return 0; } +static __net_exit void l2tp_exit_net(struct net *net) +{ + struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_tunnel *tunnel = NULL; + + rcu_read_lock_bh(); + list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { + (void)l2tp_tunnel_delete(tunnel); + } + rcu_read_unlock_bh(); +} + static struct pernet_operations l2tp_net_ops = { .init = l2tp_init_net, + .exit = l2tp_exit_net, .id = &l2tp_net_id, .size = sizeof(struct l2tp_net), }; @@ -1858,6 +1930,13 @@ static int __init l2tp_init(void) if (rc) goto out; + l2tp_wq = alloc_workqueue("l2tp", WQ_NON_REENTRANT | WQ_UNBOUND, 0); + if (!l2tp_wq) { + pr_err("alloc_workqueue failed\n"); + rc = -ENOMEM; + goto out; + } + pr_info("L2TP core driver, %s\n", L2TP_DRV_VERSION); out: @@ -1867,6 +1946,10 @@ out: static void __exit l2tp_exit(void) { unregister_pernet_device(&l2tp_net_ops); + if (l2tp_wq) { + destroy_workqueue(l2tp_wq); + l2tp_wq = NULL; + } } module_init(l2tp_init); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 56d583e..485a490 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -36,16 +36,15 @@ enum { struct sk_buff; struct l2tp_stats { - u64 tx_packets; - u64 tx_bytes; - u64 tx_errors; - u64 rx_packets; - u64 rx_bytes; - u64 rx_seq_discards; - u64 rx_oos_packets; - u64 rx_errors; - u64 rx_cookie_discards; - struct u64_stats_sync syncp; + atomic_long_t tx_packets; + atomic_long_t tx_bytes; + atomic_long_t tx_errors; + atomic_long_t rx_packets; + atomic_long_t rx_bytes; + atomic_long_t rx_seq_discards; + atomic_long_t rx_oos_packets; + atomic_long_t rx_errors; + atomic_long_t rx_cookie_discards; }; struct l2tp_tunnel; @@ -188,7 +187,10 @@ struct l2tp_tunnel { int (*recv_payload_hook)(struct sk_buff *skb); void (*old_sk_destruct)(struct sock *); struct sock *sock; /* Parent socket */ - int fd; + int fd; /* Parent fd, if tunnel socket + * was created by userspace */ + + struct work_struct del_work; uint8_t priv[0]; /* private data */ }; @@ -228,6 +230,8 @@ out: return tunnel; } +extern struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel); +extern void l2tp_tunnel_sock_put(struct sock *sk); extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); @@ -235,11 +239,14 @@ extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); +extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); +extern void __l2tp_session_unhash(struct l2tp_session *session); extern int l2tp_session_delete(struct l2tp_session *session); extern void l2tp_session_free(struct l2tp_session *session); extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb)); +extern int l2tp_session_queue_purge(struct l2tp_session *session); extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index c3813bc..072d720 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -146,14 +146,14 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0, atomic_read(&tunnel->ref_count)); - seq_printf(m, " %08x rx %llu/%llu/%llu rx %llu/%llu/%llu\n", + seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n", tunnel->debug, - (unsigned long long)tunnel->stats.tx_packets, - (unsigned long long)tunnel->stats.tx_bytes, - (unsigned long long)tunnel->stats.tx_errors, - (unsigned long long)tunnel->stats.rx_packets, - (unsigned long long)tunnel->stats.rx_bytes, - (unsigned long long)tunnel->stats.rx_errors); + atomic_long_read(&tunnel->stats.tx_packets), + atomic_long_read(&tunnel->stats.tx_bytes), + atomic_long_read(&tunnel->stats.tx_errors), + atomic_long_read(&tunnel->stats.rx_packets), + atomic_long_read(&tunnel->stats.rx_bytes), + atomic_long_read(&tunnel->stats.rx_errors)); if (tunnel->show != NULL) tunnel->show(m, tunnel); @@ -203,14 +203,14 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v) seq_printf(m, "\n"); } - seq_printf(m, " %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n", + seq_printf(m, " %hu/%hu tx %ld/%ld/%ld rx %ld/%ld/%ld\n", session->nr, session->ns, - (unsigned long long)session->stats.tx_packets, - (unsigned long long)session->stats.tx_bytes, - (unsigned long long)session->stats.tx_errors, - (unsigned long long)session->stats.rx_packets, - (unsigned long long)session->stats.rx_bytes, - (unsigned long long)session->stats.rx_errors); + atomic_long_read(&session->stats.tx_packets), + atomic_long_read(&session->stats.tx_bytes), + atomic_long_read(&session->stats.tx_errors), + atomic_long_read(&session->stats.rx_packets), + atomic_long_read(&session->stats.rx_bytes), + atomic_long_read(&session->stats.rx_errors)); if (session->show != NULL) session->show(m, session); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 61d8b75..571db8d 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -49,10 +49,9 @@ static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk) static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id) { - struct hlist_node *node; struct sock *sk; - sk_for_each_bound(sk, node, &l2tp_ip_bind_table) { + sk_for_each_bound(sk, &l2tp_ip_bind_table) { struct inet_sock *inet = inet_sk(sk); struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk); @@ -115,6 +114,7 @@ static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, in */ static int l2tp_ip_recv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); struct sock *sk; u32 session_id; u32 tunnel_id; @@ -142,7 +142,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_find(&init_net, NULL, session_id); + session = l2tp_session_find(net, NULL, session_id); if (session == NULL) goto discard; @@ -173,14 +173,14 @@ pass_up: goto discard; tunnel_id = ntohl(*(__be32 *) &skb->data[4]); - tunnel = l2tp_tunnel_find(&init_net, tunnel_id); + tunnel = l2tp_tunnel_find(net, tunnel_id); if (tunnel != NULL) sk = tunnel->sock; else { struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); - sk = __l2tp_ip_bind_lookup(&init_net, iph->daddr, 0, tunnel_id); + sk = __l2tp_ip_bind_lookup(net, iph->daddr, 0, tunnel_id); read_unlock_bh(&l2tp_ip_lock); } @@ -228,10 +228,16 @@ static void l2tp_ip_close(struct sock *sk, long timeout) static void l2tp_ip_destroy_sock(struct sock *sk) { struct sk_buff *skb; + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) kfree_skb(skb); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } + sk_refcnt_debug_dec(sk); } @@ -239,6 +245,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr; + struct net *net = sock_net(sk); int ret; int chk_addr_ret; @@ -251,7 +258,8 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) ret = -EADDRINUSE; read_lock_bh(&l2tp_ip_lock); - if (__l2tp_ip_bind_lookup(&init_net, addr->l2tp_addr.s_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id)) + if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, + sk->sk_bound_dev_if, addr->l2tp_conn_id)) goto out_in_use; read_unlock_bh(&l2tp_ip_lock); @@ -260,7 +268,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip)) goto out; - chk_addr_ret = inet_addr_type(&init_net, addr->l2tp_addr.s_addr); + chk_addr_ret = inet_addr_type(net, addr->l2tp_addr.s_addr); ret = -EADDRNOTAVAIL; if (addr->l2tp_addr.s_addr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -369,7 +377,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) return 0; drop: - IP_INC_STATS(&init_net, IPSTATS_MIB_INDISCARDS); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS); kfree_skb(skb); return -1; } @@ -605,6 +613,7 @@ static struct inet_protosw l2tp_ip_protosw = { static struct net_protocol l2tp_ip_protocol __read_mostly = { .handler = l2tp_ip_recv, + .netns_ok = 1, }; static int __init l2tp_ip_init(void) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 9275471..b8a6039 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -60,10 +60,9 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, struct in6_addr *laddr, int dif, u32 tunnel_id) { - struct hlist_node *node; struct sock *sk; - sk_for_each_bound(sk, node, &l2tp_ip6_bind_table) { + sk_for_each_bound(sk, &l2tp_ip6_bind_table) { struct in6_addr *addr = inet6_rcv_saddr(sk); struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); @@ -242,10 +241,17 @@ static void l2tp_ip6_close(struct sock *sk, long timeout) static void l2tp_ip6_destroy_sock(struct sock *sk) { + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } + inet6_destroy_sock(sk); } @@ -554,8 +560,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -646,7 +652,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { - struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)msg->msg_name; size_t copied = 0; int err = -EOPNOTSUPP; @@ -684,12 +690,13 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, lsa->l2tp_addr = ipv6_hdr(skb)->saddr; lsa->l2tp_flowinfo = 0; lsa->l2tp_scope_id = 0; + lsa->l2tp_conn_id = 0; if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) lsa->l2tp_scope_id = IP6CB(skb)->iif; } - if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); + if (np->rxopt.all) + ip6_datagram_recv_ctl(sk, msg, skb); if (flags & MSG_TRUNC) copied = skb->len; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index bbba3a1..0825ff2 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -37,6 +37,7 @@ static struct genl_family l2tp_nl_family = { .version = L2TP_GENL_VERSION, .hdrsize = 0, .maxattr = L2TP_ATTR_MAX, + .netnsok = true, }; /* Accessed under genl lock */ @@ -245,8 +246,6 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla #if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *np = NULL; #endif - struct l2tp_stats stats; - unsigned int start; hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); @@ -264,28 +263,22 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla if (nest == NULL) goto nla_put_failure; - do { - start = u64_stats_fetch_begin(&tunnel->stats.syncp); - stats.tx_packets = tunnel->stats.tx_packets; - stats.tx_bytes = tunnel->stats.tx_bytes; - stats.tx_errors = tunnel->stats.tx_errors; - stats.rx_packets = tunnel->stats.rx_packets; - stats.rx_bytes = tunnel->stats.rx_bytes; - stats.rx_errors = tunnel->stats.rx_errors; - stats.rx_seq_discards = tunnel->stats.rx_seq_discards; - stats.rx_oos_packets = tunnel->stats.rx_oos_packets; - } while (u64_stats_fetch_retry(&tunnel->stats.syncp, start)); - - if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) || - nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) || - nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) || - nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) || + if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, + atomic_long_read(&tunnel->stats.tx_packets)) || + nla_put_u64(skb, L2TP_ATTR_TX_BYTES, + atomic_long_read(&tunnel->stats.tx_bytes)) || + nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, + atomic_long_read(&tunnel->stats.tx_errors)) || + nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, + atomic_long_read(&tunnel->stats.rx_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_BYTES, + atomic_long_read(&tunnel->stats.rx_bytes)) || nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, - stats.rx_seq_discards) || + atomic_long_read(&tunnel->stats.rx_seq_discards)) || nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS, - stats.rx_oos_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors)) + atomic_long_read(&tunnel->stats.rx_oos_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, + atomic_long_read(&tunnel->stats.rx_errors))) goto nla_put_failure; nla_nest_end(skb, nest); @@ -611,8 +604,6 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl struct nlattr *nest; struct l2tp_tunnel *tunnel = session->tunnel; struct sock *sk = NULL; - struct l2tp_stats stats; - unsigned int start; sk = tunnel->sock; @@ -655,28 +646,22 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl if (nest == NULL) goto nla_put_failure; - do { - start = u64_stats_fetch_begin(&session->stats.syncp); - stats.tx_packets = session->stats.tx_packets; - stats.tx_bytes = session->stats.tx_bytes; - stats.tx_errors = session->stats.tx_errors; - stats.rx_packets = session->stats.rx_packets; - stats.rx_bytes = session->stats.rx_bytes; - stats.rx_errors = session->stats.rx_errors; - stats.rx_seq_discards = session->stats.rx_seq_discards; - stats.rx_oos_packets = session->stats.rx_oos_packets; - } while (u64_stats_fetch_retry(&session->stats.syncp, start)); - - if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) || - nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) || - nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) || - nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) || + if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, + atomic_long_read(&session->stats.tx_packets)) || + nla_put_u64(skb, L2TP_ATTR_TX_BYTES, + atomic_long_read(&session->stats.tx_bytes)) || + nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, + atomic_long_read(&session->stats.tx_errors)) || + nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, + atomic_long_read(&session->stats.rx_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_BYTES, + atomic_long_read(&session->stats.rx_bytes)) || nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, - stats.rx_seq_discards) || + atomic_long_read(&session->stats.rx_seq_discards)) || nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS, - stats.rx_oos_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors)) + atomic_long_read(&session->stats.rx_oos_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, + atomic_long_read(&session->stats.rx_errors))) goto nla_put_failure; nla_nest_end(skb, nest); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 286366e..637a341 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -97,6 +97,7 @@ #include <net/ip.h> #include <net/udp.h> #include <net/xfrm.h> +#include <net/inet_common.h> #include <asm/byteorder.h> #include <linux/atomic.h> @@ -259,7 +260,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int session->name); /* Not bound. Nothing we can do, so discard. */ - session->stats.rx_errors++; + atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); } @@ -355,6 +356,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh l2tp_xmit_skb(session, skb, session->hdr_len); sock_put(ps->tunnel_sock); + sock_put(sk); return error; @@ -388,8 +390,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) struct l2tp_session *session; struct l2tp_tunnel *tunnel; struct pppol2tp_session *ps; - int old_headroom; - int new_headroom; int uhlen, headroom; if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) @@ -408,7 +408,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) if (tunnel == NULL) goto abort_put_sess; - old_headroom = skb_headroom(skb); uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; headroom = NET_SKB_PAD + sizeof(struct iphdr) + /* IP header */ @@ -418,9 +417,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) if (skb_cow_head(skb, headroom)) goto abort_put_sess_tun; - new_headroom = skb_headroom(skb); - skb->truesize += new_headroom - old_headroom; - /* Setup PPP header */ __skb_push(skb, sizeof(ppph)); skb->data[0] = ppph[0]; @@ -452,34 +448,16 @@ static void pppol2tp_session_close(struct l2tp_session *session) { struct pppol2tp_session *ps = l2tp_session_priv(session); struct sock *sk = ps->sock; - struct sk_buff *skb; + struct socket *sock = sk->sk_socket; BUG_ON(session->magic != L2TP_SESSION_MAGIC); - if (session->session_id == 0) - goto out; - - if (sk != NULL) { - lock_sock(sk); - - if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { - pppox_unbind_sock(sk); - sk->sk_state = PPPOX_DEAD; - sk->sk_state_change(sk); - } - /* Purge any queued data */ - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); - while ((skb = skb_dequeue(&session->reorder_q))) { - kfree_skb(skb); - sock_put(sk); - } - - release_sock(sk); + if (sock) { + inet_shutdown(sock, 2); + /* Don't let the session go away before our socket does */ + l2tp_session_inc_refcount(session); } - -out: return; } @@ -488,19 +466,12 @@ out: */ static void pppol2tp_session_destruct(struct sock *sk) { - struct l2tp_session *session; - - if (sk->sk_user_data != NULL) { - session = sk->sk_user_data; - if (session == NULL) - goto out; - + struct l2tp_session *session = sk->sk_user_data; + if (session) { sk->sk_user_data = NULL; BUG_ON(session->magic != L2TP_SESSION_MAGIC); l2tp_session_dec_refcount(session); } - -out: return; } @@ -530,16 +501,13 @@ static int pppol2tp_release(struct socket *sock) session = pppol2tp_sock_to_session(sk); /* Purge any queued data */ - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); if (session != NULL) { - struct sk_buff *skb; - while ((skb = skb_dequeue(&session->reorder_q))) { - kfree_skb(skb); - sock_put(sk); - } + __l2tp_session_unhash(session); + l2tp_session_queue_purge(session); sock_put(sk); } + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); release_sock(sk); @@ -885,18 +853,6 @@ out: return error; } -/* Called when deleting sessions via the netlink interface. - */ -static int pppol2tp_session_delete(struct l2tp_session *session) -{ - struct pppol2tp_session *ps = l2tp_session_priv(session); - - if (ps->sock == NULL) - l2tp_session_dec_refcount(session); - - return 0; -} - #endif /* CONFIG_L2TP_V3 */ /* getname() support. @@ -1030,14 +986,14 @@ end: static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest, struct l2tp_stats *stats) { - dest->tx_packets = stats->tx_packets; - dest->tx_bytes = stats->tx_bytes; - dest->tx_errors = stats->tx_errors; - dest->rx_packets = stats->rx_packets; - dest->rx_bytes = stats->rx_bytes; - dest->rx_seq_discards = stats->rx_seq_discards; - dest->rx_oos_packets = stats->rx_oos_packets; - dest->rx_errors = stats->rx_errors; + dest->tx_packets = atomic_long_read(&stats->tx_packets); + dest->tx_bytes = atomic_long_read(&stats->tx_bytes); + dest->tx_errors = atomic_long_read(&stats->tx_errors); + dest->rx_packets = atomic_long_read(&stats->rx_packets); + dest->rx_bytes = atomic_long_read(&stats->rx_bytes); + dest->rx_seq_discards = atomic_long_read(&stats->rx_seq_discards); + dest->rx_oos_packets = atomic_long_read(&stats->rx_oos_packets); + dest->rx_errors = atomic_long_read(&stats->rx_errors); } /* Session ioctl helper. @@ -1671,14 +1627,14 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v) tunnel->name, (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N', atomic_read(&tunnel->ref_count) - 1); - seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n", + seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n", tunnel->debug, - (unsigned long long)tunnel->stats.tx_packets, - (unsigned long long)tunnel->stats.tx_bytes, - (unsigned long long)tunnel->stats.tx_errors, - (unsigned long long)tunnel->stats.rx_packets, - (unsigned long long)tunnel->stats.rx_bytes, - (unsigned long long)tunnel->stats.rx_errors); + atomic_long_read(&tunnel->stats.tx_packets), + atomic_long_read(&tunnel->stats.tx_bytes), + atomic_long_read(&tunnel->stats.tx_errors), + atomic_long_read(&tunnel->stats.rx_packets), + atomic_long_read(&tunnel->stats.rx_bytes), + atomic_long_read(&tunnel->stats.rx_errors)); } static void pppol2tp_seq_session_show(struct seq_file *m, void *v) @@ -1713,14 +1669,14 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) session->lns_mode ? "LNS" : "LAC", session->debug, jiffies_to_msecs(session->reorder_timeout)); - seq_printf(m, " %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n", + seq_printf(m, " %hu/%hu %ld/%ld/%ld %ld/%ld/%ld\n", session->nr, session->ns, - (unsigned long long)session->stats.tx_packets, - (unsigned long long)session->stats.tx_bytes, - (unsigned long long)session->stats.tx_errors, - (unsigned long long)session->stats.rx_packets, - (unsigned long long)session->stats.rx_bytes, - (unsigned long long)session->stats.rx_errors); + atomic_long_read(&session->stats.tx_packets), + atomic_long_read(&session->stats.tx_bytes), + atomic_long_read(&session->stats.tx_errors), + atomic_long_read(&session->stats.rx_packets), + atomic_long_read(&session->stats.rx_bytes), + atomic_long_read(&session->stats.rx_errors)); if (po) seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); @@ -1789,7 +1745,8 @@ static __net_init int pppol2tp_init_net(struct net *net) struct proc_dir_entry *pde; int err = 0; - pde = proc_net_fops_create(net, "pppol2tp", S_IRUGO, &pppol2tp_proc_fops); + pde = proc_create("pppol2tp", S_IRUGO, net->proc_net, + &pppol2tp_proc_fops); if (!pde) { err = -ENOMEM; goto out; @@ -1801,7 +1758,7 @@ out: static __net_exit void pppol2tp_exit_net(struct net *net) { - proc_net_remove(net, "pppol2tp"); + remove_proc_entry("pppol2tp", net->proc_net); } static struct pernet_operations pppol2tp_net_ops = { @@ -1843,7 +1800,7 @@ static const struct pppox_proto pppol2tp_proto = { static const struct l2tp_nl_cmd_ops pppol2tp_nl_cmd_ops = { .session_create = pppol2tp_session_create, - .session_delete = pppol2tp_session_delete, + .session_delete = l2tp_session_delete, }; #endif /* CONFIG_L2TP_V3 */ diff --git a/net/lapb/Kconfig b/net/lapb/Kconfig index f0b5efb..6481839 100644 --- a/net/lapb/Kconfig +++ b/net/lapb/Kconfig @@ -3,8 +3,7 @@ # config LAPB - tristate "LAPB Data Link Driver (EXPERIMENTAL)" - depends on EXPERIMENTAL + tristate "LAPB Data Link Driver" ---help--- Link Access Procedure, Balanced (LAPB) is the data link layer (i.e. the lower) part of the X.25 protocol. It offers a reliable diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8870988..48aaa89 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -720,6 +720,8 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, int target; /* Read at least this many bytes */ long timeo; + msg->msg_namelen = 0; + lock_sock(sk); copied = -ENOTCONN; if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 7c5073b..78be45c 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -393,12 +393,11 @@ static void llc_sap_mcast(struct llc_sap *sap, { int i = 0, count = 256 / sizeof(struct sock *); struct sock *sk, *stack[count]; - struct hlist_node *node; struct llc_sock *llc; struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex); spin_lock_bh(&sap->sk_lock); - hlist_for_each_entry(llc, node, dev_hb, dev_hash_node) { + hlist_for_each_entry(llc, dev_hb, dev_hash_node) { sk = &llc->sk; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index b4ecf26..62535fe 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -81,7 +81,7 @@ comment "Some wireless drivers require a rate control algorithm" config MAC80211_MESH bool "Enable mac80211 mesh networking (pre-802.11s) support" - depends on MAC80211 && EXPERIMENTAL + depends on MAC80211 ---help--- This options enables support of Draft 802.11s mesh networking. The implementation is based on Draft 2.08 of the Mesh Networking @@ -258,6 +258,17 @@ config MAC80211_MESH_SYNC_DEBUG Do not select this option. +config MAC80211_MESH_PS_DEBUG + bool "Verbose mesh powersave debugging" + depends on MAC80211_DEBUG_MENU + depends on MAC80211_MESH + ---help--- + Selecting this option causes mac80211 to print out very verbose mesh + powersave debugging messages (when mac80211 is taking part in a + mesh network). + + Do not select this option. + config MAC80211_TDLS_DEBUG bool "Verbose TDLS debugging" depends on MAC80211_DEBUG_MENU diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 4911202..9d7d840 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -39,7 +39,8 @@ mac80211-$(CONFIG_MAC80211_MESH) += \ mesh_pathtbl.o \ mesh_plink.o \ mesh_hwmp.o \ - mesh_sync.o + mesh_sync.o \ + mesh_ps.o mac80211-$(CONFIG_PM) += pm.o diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 808338a..31bf258 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -83,8 +83,8 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, &sta->sta, tid, NULL, 0)) sdata_info(sta->sdata, - "HW problem - can not stop rx aggregation for tid %d\n", - tid); + "HW problem - can not stop rx aggregation for %pM tid %d\n", + sta->sta.addr, tid); /* check if this is a self generated aggregation halt */ if (initiator == WLAN_BACK_RECIPIENT && tx) @@ -159,7 +159,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data) } rcu_read_unlock(); - ht_dbg(sta->sdata, "rx session timer expired on tid %d\n", (u16)*ptid); + ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n", + sta->sta.addr, (u16)*ptid); set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired); ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); @@ -247,7 +248,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, status = WLAN_STATUS_REQUEST_DECLINED; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { - ht_dbg(sta->sdata, "Suspend in progress - Denying ADDBA request\n"); + ht_dbg(sta->sdata, + "Suspend in progress - Denying ADDBA request (%pM tid %d)\n", + sta->sta.addr, tid); goto end_no_lock; } @@ -317,7 +320,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, &sta->sta, tid, &start_seq_num, 0); - ht_dbg(sta->sdata, "Rx A-MPDU request on tid %d result %d\n", tid, ret); + ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n", + sta->sta.addr, tid, ret); if (ret) { kfree(tid_agg_rx->reorder_buf); kfree(tid_agg_rx->reorder_time); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index eb9df22..13b7683 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -149,16 +149,133 @@ void ieee80211_assign_tid_tx(struct sta_info *sta, int tid, rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx); } +static inline int ieee80211_ac_from_tid(int tid) +{ + return ieee802_1d_to_ac[tid & 7]; +} + +/* + * When multiple aggregation sessions on multiple stations + * are being created/destroyed simultaneously, we need to + * refcount the global queue stop caused by that in order + * to not get into a situation where one of the aggregation + * setup or teardown re-enables queues before the other is + * ready to handle that. + * + * These two functions take care of this issue by keeping + * a global "agg_queue_stop" refcount. + */ +static void __acquires(agg_queue) +ieee80211_stop_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) +{ + int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; + + if (atomic_inc_return(&sdata->local->agg_queue_stop[queue]) == 1) + ieee80211_stop_queue_by_reason( + &sdata->local->hw, queue, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + __acquire(agg_queue); +} + +static void __releases(agg_queue) +ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) +{ + int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; + + if (atomic_dec_return(&sdata->local->agg_queue_stop[queue]) == 0) + ieee80211_wake_queue_by_reason( + &sdata->local->hw, queue, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + __release(agg_queue); +} + +/* + * splice packets from the STA's pending to the local pending, + * requires a call to ieee80211_agg_splice_finish later + */ +static void __acquires(agg_queue) +ieee80211_agg_splice_packets(struct ieee80211_sub_if_data *sdata, + struct tid_ampdu_tx *tid_tx, u16 tid) +{ + struct ieee80211_local *local = sdata->local; + int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; + unsigned long flags; + + ieee80211_stop_queue_agg(sdata, tid); + + if (WARN(!tid_tx, + "TID %d gone but expected when splicing aggregates from the pending queue\n", + tid)) + return; + + if (!skb_queue_empty(&tid_tx->pending)) { + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + /* copy over remaining packets */ + skb_queue_splice_tail_init(&tid_tx->pending, + &local->pending[queue]); + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + } +} + +static void __releases(agg_queue) +ieee80211_agg_splice_finish(struct ieee80211_sub_if_data *sdata, u16 tid) +{ + ieee80211_wake_queue_agg(sdata, tid); +} + +static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) +{ + struct tid_ampdu_tx *tid_tx; + + lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_held(&sta->lock); + + tid_tx = rcu_dereference_protected_tid_tx(sta, tid); + + /* + * When we get here, the TX path will not be lockless any more wrt. + * aggregation, since the OPERATIONAL bit has long been cleared. + * Thus it will block on getting the lock, if it occurs. So if we + * stop the queue now, we will not get any more packets, and any + * that might be being processed will wait for us here, thereby + * guaranteeing that no packets go to the tid_tx pending queue any + * more. + */ + + ieee80211_agg_splice_packets(sta->sdata, tid_tx, tid); + + /* future packets must not find the tid_tx struct any more */ + ieee80211_assign_tid_tx(sta, tid, NULL); + + ieee80211_agg_splice_finish(sta->sdata, tid); + + kfree_rcu(tid_tx, rcu_head); +} + int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator, - bool tx) + enum ieee80211_agg_stop_reason reason) { struct ieee80211_local *local = sta->local; struct tid_ampdu_tx *tid_tx; + enum ieee80211_ampdu_mlme_action action; int ret; lockdep_assert_held(&sta->ampdu_mlme.mtx); + switch (reason) { + case AGG_STOP_DECLINED: + case AGG_STOP_LOCAL_REQUEST: + case AGG_STOP_PEER_REQUEST: + action = IEEE80211_AMPDU_TX_STOP_CONT; + break; + case AGG_STOP_DESTROY_STA: + action = IEEE80211_AMPDU_TX_STOP_FLUSH; + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + spin_lock_bh(&sta->lock); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); @@ -167,10 +284,19 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, return -ENOENT; } - /* if we're already stopping ignore any new requests to stop */ + /* + * if we're already stopping ignore any new requests to stop + * unless we're destroying it in which case notify the driver + */ if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { spin_unlock_bh(&sta->lock); - return -EALREADY; + if (reason != AGG_STOP_DESTROY_STA) + return -EALREADY; + ret = drv_ampdu_action(local, sta->sdata, + IEEE80211_AMPDU_TX_STOP_FLUSH_CONT, + &sta->sta, tid, NULL, 0); + WARN_ON_ONCE(ret); + return 0; } if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) { @@ -212,11 +338,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, */ synchronize_net(); - tid_tx->stop_initiator = initiator; - tid_tx->tx_stop = tx; + tid_tx->stop_initiator = reason == AGG_STOP_PEER_REQUEST ? + WLAN_BACK_RECIPIENT : + WLAN_BACK_INITIATOR; + tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST; - ret = drv_ampdu_action(local, sta->sdata, - IEEE80211_AMPDU_TX_STOP, + ret = drv_ampdu_action(local, sta->sdata, action, &sta->sta, tid, NULL, 0); /* HW shall not deny going back to legacy */ @@ -227,7 +354,17 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, */ } - return ret; + /* + * In the case of AGG_STOP_DESTROY_STA, the driver won't + * necessarily call ieee80211_stop_tx_ba_cb(), so this may + * seem like we can leave the tid_tx data pending forever. + * This is true, in a way, but "forever" is only until the + * station struct is actually destroyed. In the meantime, + * leaving it around ensures that we don't transmit packets + * to the driver on this TID which might confuse it. + */ + + return 0; } /* @@ -253,91 +390,18 @@ static void sta_addba_resp_timer_expired(unsigned long data) test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { rcu_read_unlock(); ht_dbg(sta->sdata, - "timer expired on tid %d but we are not (or no longer) expecting addBA response there\n", - tid); + "timer expired on %pM tid %d but we are not (or no longer) expecting addBA response there\n", + sta->sta.addr, tid); return; } - ht_dbg(sta->sdata, "addBA response timer expired on tid %d\n", tid); + ht_dbg(sta->sdata, "addBA response timer expired on %pM tid %d\n", + sta->sta.addr, tid); ieee80211_stop_tx_ba_session(&sta->sta, tid); rcu_read_unlock(); } -static inline int ieee80211_ac_from_tid(int tid) -{ - return ieee802_1d_to_ac[tid & 7]; -} - -/* - * When multiple aggregation sessions on multiple stations - * are being created/destroyed simultaneously, we need to - * refcount the global queue stop caused by that in order - * to not get into a situation where one of the aggregation - * setup or teardown re-enables queues before the other is - * ready to handle that. - * - * These two functions take care of this issue by keeping - * a global "agg_queue_stop" refcount. - */ -static void __acquires(agg_queue) -ieee80211_stop_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) -{ - int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; - - if (atomic_inc_return(&sdata->local->agg_queue_stop[queue]) == 1) - ieee80211_stop_queue_by_reason( - &sdata->local->hw, queue, - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - __acquire(agg_queue); -} - -static void __releases(agg_queue) -ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) -{ - int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; - - if (atomic_dec_return(&sdata->local->agg_queue_stop[queue]) == 0) - ieee80211_wake_queue_by_reason( - &sdata->local->hw, queue, - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - __release(agg_queue); -} - -/* - * splice packets from the STA's pending to the local pending, - * requires a call to ieee80211_agg_splice_finish later - */ -static void __acquires(agg_queue) -ieee80211_agg_splice_packets(struct ieee80211_sub_if_data *sdata, - struct tid_ampdu_tx *tid_tx, u16 tid) -{ - struct ieee80211_local *local = sdata->local; - int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; - unsigned long flags; - - ieee80211_stop_queue_agg(sdata, tid); - - if (WARN(!tid_tx, - "TID %d gone but expected when splicing aggregates from the pending queue\n", - tid)) - return; - - if (!skb_queue_empty(&tid_tx->pending)) { - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - /* copy over remaining packets */ - skb_queue_splice_tail_init(&tid_tx->pending, - &local->pending[queue]); - spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - } -} - -static void __releases(agg_queue) -ieee80211_agg_splice_finish(struct ieee80211_sub_if_data *sdata, u16 tid) -{ - ieee80211_wake_queue_agg(sdata, tid); -} - void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; @@ -369,7 +433,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) &sta->sta, tid, &start_seq_num, 0); if (ret) { ht_dbg(sdata, - "BA request denied - HW unavailable for tid %d\n", tid); + "BA request denied - HW unavailable for %pM tid %d\n", + sta->sta.addr, tid); spin_lock_bh(&sta->lock); ieee80211_agg_splice_packets(sdata, tid_tx, tid); ieee80211_assign_tid_tx(sta, tid, NULL); @@ -382,7 +447,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); - ht_dbg(sdata, "activated addBA response timer on tid %d\n", tid); + ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n", + sta->sta.addr, tid); spin_lock_bh(&sta->lock); sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; @@ -429,7 +495,8 @@ static void sta_tx_agg_session_timer_expired(unsigned long data) rcu_read_unlock(); - ht_dbg(sta->sdata, "tx session timer expired on tid %d\n", (u16)*ptid); + ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n", + sta->sta.addr, (u16)*ptid); ieee80211_stop_tx_ba_session(&sta->sta, *ptid); } @@ -465,7 +532,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { ht_dbg(sdata, - "BA sessions blocked - Denying BA session request\n"); + "BA sessions blocked - Denying BA session request %pM tid %d\n", + sta->sta.addr, tid); return -EINVAL; } @@ -506,8 +574,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] + HT_AGG_RETRIES_PERIOD)) { ht_dbg(sdata, - "BA request denied - waiting a grace period after %d failed requests on tid %u\n", - sta->ampdu_mlme.addba_req_num[tid], tid); + "BA request denied - waiting a grace period after %d failed requests on %pM tid %u\n", + sta->ampdu_mlme.addba_req_num[tid], sta->sta.addr, tid); ret = -EBUSY; goto err_unlock_sta; } @@ -516,8 +584,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, /* check if the TID is not in aggregation flow already */ if (tid_tx || sta->ampdu_mlme.tid_start_tx[tid]) { ht_dbg(sdata, - "BA request denied - session is not idle on tid %u\n", - tid); + "BA request denied - session is not idle on %pM tid %u\n", + sta->sta.addr, tid); ret = -EAGAIN; goto err_unlock_sta; } @@ -572,7 +640,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, tid_tx = rcu_dereference_protected_tid_tx(sta, tid); - ht_dbg(sta->sdata, "Aggregation is on for tid %d\n", tid); + ht_dbg(sta->sdata, "Aggregation is on for %pM tid %d\n", + sta->sta.addr, tid); drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_OPERATIONAL, @@ -660,14 +729,13 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator, - bool tx) + enum ieee80211_agg_stop_reason reason) { int ret; mutex_lock(&sta->ampdu_mlme.mtx); - ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator, tx); + ret = ___ieee80211_stop_tx_ba_session(sta, tid, reason); mutex_unlock(&sta->ampdu_mlme.mtx); @@ -743,7 +811,9 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { - ht_dbg(sdata, "unexpected callback to A-MPDU stop\n"); + ht_dbg(sdata, + "unexpected callback to A-MPDU stop for %pM tid %d\n", + sta->sta.addr, tid); goto unlock_sta; } @@ -751,24 +821,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) ieee80211_send_delba(sta->sdata, ra, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); - /* - * When we get here, the TX path will not be lockless any more wrt. - * aggregation, since the OPERATIONAL bit has long been cleared. - * Thus it will block on getting the lock, if it occurs. So if we - * stop the queue now, we will not get any more packets, and any - * that might be being processed will wait for us here, thereby - * guaranteeing that no packets go to the tid_tx pending queue any - * more. - */ - - ieee80211_agg_splice_packets(sta->sdata, tid_tx, tid); - - /* future packets must not find the tid_tx struct any more */ - ieee80211_assign_tid_tx(sta, tid, NULL); - - ieee80211_agg_splice_finish(sta->sdata, tid); - - kfree_rcu(tid_tx, rcu_head); + ieee80211_remove_tid_tx(sta, tid); unlock_sta: spin_unlock_bh(&sta->lock); @@ -819,13 +872,15 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, goto out; if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) { - ht_dbg(sta->sdata, "wrong addBA response token, tid %d\n", tid); + ht_dbg(sta->sdata, "wrong addBA response token, %pM tid %d\n", + sta->sta.addr, tid); goto out; } del_timer_sync(&tid_tx->addba_resp_timer); - ht_dbg(sta->sdata, "switched off addBA timer for tid %d\n", tid); + ht_dbg(sta->sdata, "switched off addBA timer for %pM tid %d\n", + sta->sta.addr, tid); /* * addba_resp_timer may have fired before we got here, and @@ -835,8 +890,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, if (test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state) || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { ht_dbg(sta->sdata, - "got addBA resp for tid %d but we already gave up\n", - tid); + "got addBA resp for %pM tid %d but we already gave up\n", + sta->sta.addr, tid); goto out; } @@ -868,8 +923,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, } } else { - ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR, - false); + ___ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_DECLINED); } out: diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5c61677..a689360 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -164,7 +164,17 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, sta = sta_info_get(sdata, mac_addr); else sta = sta_info_get_bss(sdata, mac_addr); - if (!sta) { + /* + * The ASSOC test makes sure the driver is ready to + * receive the key. When wpa_supplicant has roamed + * using FT, it attempts to set the key before + * association has completed, this rejects that attempt + * so it will set the key again after assocation. + * + * TODO: accept the key if we have a station entry and + * add it to the device after the station. + */ + if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) { ieee80211_key_free(sdata->local, key); err = -ENOENT; goto out_unlock; @@ -482,7 +492,10 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) #ifdef CONFIG_MAC80211_MESH sinfo->filled |= STATION_INFO_LLID | STATION_INFO_PLID | - STATION_INFO_PLINK_STATE; + STATION_INFO_PLINK_STATE | + STATION_INFO_LOCAL_PM | + STATION_INFO_PEER_PM | + STATION_INFO_NONPEER_PM; sinfo->llid = le16_to_cpu(sta->llid); sinfo->plid = le16_to_cpu(sta->plid); @@ -491,6 +504,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->filled |= STATION_INFO_T_OFFSET; sinfo->t_offset = sta->t_offset; } + sinfo->local_pm = sta->local_pm; + sinfo->peer_pm = sta->peer_pm; + sinfo->nonpeer_pm = sta->nonpeer_pm; #endif } @@ -510,6 +526,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_TDLS_PEER); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED); @@ -521,6 +538,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP); if (test_sta_flag(sta, WLAN_STA_AUTH)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED); + if (test_sta_flag(sta, WLAN_STA_ASSOC)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); } @@ -909,11 +928,13 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, /* TODO: make hostapd tell us what it wants */ sdata->smps_mode = IEEE80211_SMPS_OFF; sdata->needed_rx_chains = sdata->local->rx_chains; + sdata->radar_required = params->radar_required; err = ieee80211_vif_use_channel(sdata, ¶ms->chandef, IEEE80211_CHANCTX_SHARED); if (err) return err; + ieee80211_vif_copy_chanctx_to_vlans(sdata, false); /* * Apply control port protocol, this allows us to @@ -930,6 +951,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.beacon_int = params->beacon_interval; sdata->vif.bss_conf.dtim_period = params->dtim_period; + sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.ssid_len = params->ssid_len; if (params->ssid_len) @@ -1009,7 +1031,16 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) if (old_probe_resp) kfree_rcu(old_probe_resp, rcu_head); - sta_info_flush(local, sdata); + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + sta_info_flush_defer(vlan); + sta_info_flush_defer(sdata); + rcu_barrier(); + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + sta_info_flush_cleanup(vlan); + sta_info_flush_cleanup(sdata); + + sdata->vif.bss_conf.enable_beacon = false; + clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); drv_stop_ap(sdata->local, sdata); @@ -1018,6 +1049,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf); skb_queue_purge(&sdata->u.ap.ps.bc_buf); + ieee80211_vif_copy_chanctx_to_vlans(sdata, true); ieee80211_vif_release_channel(sdata); return 0; @@ -1067,6 +1099,58 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) netif_rx_ni(skb); } +static int sta_apply_auth_flags(struct ieee80211_local *local, + struct sta_info *sta, + u32 mask, u32 set) +{ + int ret; + + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + !test_sta_flag(sta, WLAN_STA_AUTH)) { + ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && + set & BIT(NL80211_STA_FLAG_ASSOCIATED) && + !test_sta_flag(sta, WLAN_STA_ASSOC)) { + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { + if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) + ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); + else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + else + ret = 0; + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && + !(set & BIT(NL80211_STA_FLAG_ASSOCIATED)) && + test_sta_flag(sta, WLAN_STA_ASSOC)) { + ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + !(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && + test_sta_flag(sta, WLAN_STA_AUTH)) { + ret = sta_info_move_state(sta, IEEE80211_STA_NONE); + if (ret) + return ret; + } + + return 0; +} + static int sta_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) @@ -1084,52 +1168,20 @@ static int sta_apply_parameters(struct ieee80211_local *local, mask = params->sta_flags_mask; set = params->sta_flags_set; - /* - * In mesh mode, we can clear AUTHENTICATED flag but must - * also make ASSOCIATED follow appropriately for the driver - * API. See also below, after AUTHORIZED changes. - */ - if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { - /* cfg80211 should not allow this in non-mesh modes */ - if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif))) - return -EINVAL; - - if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && - !test_sta_flag(sta, WLAN_STA_AUTH)) { - ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); - if (ret) - return ret; - ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); - if (ret) - return ret; - } - } - - if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { - if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) - ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); - else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) - ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); - if (ret) - return ret; - } - - if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { - /* cfg80211 should not allow this in non-mesh modes */ - if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif))) - return -EINVAL; - - if (!(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && - test_sta_flag(sta, WLAN_STA_AUTH)) { - ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); - if (ret) - return ret; - ret = sta_info_move_state(sta, IEEE80211_STA_NONE); - if (ret) - return ret; - } + if (ieee80211_vif_is_mesh(&sdata->vif)) { + /* + * In mesh mode, ASSOCIATED isn't part of the nl80211 + * API but must follow AUTHENTICATED for driver state. + */ + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) + mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); + if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) + set |= BIT(NL80211_STA_FLAG_ASSOCIATED); } + ret = sta_apply_auth_flags(local, sta, mask, set); + if (ret) + return ret; if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) { if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) @@ -1175,10 +1227,11 @@ static int sta_apply_parameters(struct ieee80211_local *local, sta->sta.aid = params->aid; /* - * FIXME: updating the following information is racy when this - * function is called from ieee80211_change_station(). - * However, all this information should be static so - * maybe we should just reject attemps to change it. + * Some of the following updates would be racy if called on an + * existing station, via ieee80211_change_station(). However, + * all such changes are rejected by cfg80211 except for updates + * changing the supported rates on an existing but not yet used + * TDLS peer. */ if (params->listen_interval >= 0) @@ -1199,36 +1252,62 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (params->ht_capa) ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, - params->ht_capa, - &sta->sta.ht_cap); + params->ht_capa, sta); if (params->vht_capa) ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, - params->vht_capa, - &sta->sta.vht_cap); + params->vht_capa, sta); if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH - if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) + u32 changed = 0; + if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) { switch (params->plink_state) { - case NL80211_PLINK_LISTEN: case NL80211_PLINK_ESTAB: + if (sta->plink_state != NL80211_PLINK_ESTAB) + changed = mesh_plink_inc_estab_count( + sdata); + sta->plink_state = params->plink_state; + + ieee80211_mps_sta_status_update(sta); + changed |= ieee80211_mps_set_sta_local_pm(sta, + sdata->u.mesh.mshcfg.power_mode); + break; + case NL80211_PLINK_LISTEN: case NL80211_PLINK_BLOCKED: + case NL80211_PLINK_OPN_SNT: + case NL80211_PLINK_OPN_RCVD: + case NL80211_PLINK_CNF_RCVD: + case NL80211_PLINK_HOLDING: + if (sta->plink_state == NL80211_PLINK_ESTAB) + changed = mesh_plink_dec_estab_count( + sdata); sta->plink_state = params->plink_state; + + ieee80211_mps_sta_status_update(sta); + changed |= + ieee80211_mps_local_status_update(sdata); break; default: /* nothing */ break; } - else + } else { switch (params->plink_action) { case PLINK_ACTION_OPEN: - mesh_plink_open(sta); + changed |= mesh_plink_open(sta); break; case PLINK_ACTION_BLOCK: - mesh_plink_block(sta); + changed |= mesh_plink_block(sta); break; } + } + + if (params->local_pm) + changed |= + ieee80211_mps_set_sta_local_pm(sta, + params->local_pm); + ieee80211_bss_info_change_notify(sdata, changed); #endif } @@ -1263,6 +1342,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (!sta) return -ENOMEM; + /* + * defaults -- if userspace wants something else we'll + * change it accordingly in sta_apply_parameters() + */ sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); @@ -1299,7 +1382,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, u8 *mac) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1307,7 +1389,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, if (mac) return sta_info_destroy_addr_bss(sdata, mac); - sta_info_flush(local, sdata); + sta_info_flush(sdata); return 0; } @@ -1330,9 +1412,11 @@ static int ieee80211_change_station(struct wiphy *wiphy, return -ENOENT; } - /* in station mode, supported rates are only valid with TDLS */ + /* in station mode, some updates are only valid with TDLS */ if (sdata->vif.type == NL80211_IFTYPE_STATION && - params->supported_rates && + (params->supported_rates || params->ht_capa || params->vht_capa || + params->sta_modify_mask || + (params->sta_flags_mask & BIT(NL80211_STA_FLAG_WME))) && !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { mutex_unlock(&local->sta_mtx); return -EINVAL; @@ -1416,13 +1500,13 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, return -ENOENT; } - err = mesh_path_add(dst, sdata); + err = mesh_path_add(sdata, dst); if (err) { rcu_read_unlock(); return err; } - mpath = mesh_path_lookup(dst, sdata); + mpath = mesh_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENXIO; @@ -1434,12 +1518,12 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, - u8 *dst) + u8 *dst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (dst) - return mesh_path_del(dst, sdata); + return mesh_path_del(sdata, dst); mesh_path_flush_by_iface(sdata); return 0; @@ -1463,7 +1547,7 @@ static int ieee80211_change_mpath(struct wiphy *wiphy, return -ENOENT; } - mpath = mesh_path_lookup(dst, sdata); + mpath = mesh_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; @@ -1527,7 +1611,7 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); - mpath = mesh_path_lookup(dst, sdata); + mpath = mesh_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; @@ -1548,7 +1632,7 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); - mpath = mesh_path_lookup_by_idx(idx, sdata); + mpath = mesh_path_lookup_by_idx(sdata, idx); if (!mpath) { rcu_read_unlock(); return -ENOENT; @@ -1613,6 +1697,9 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, memcpy(sdata->vif.bss_conf.mcast_rate, setup->mcast_rate, sizeof(setup->mcast_rate)); + sdata->vif.bss_conf.beacon_int = setup->beacon_interval; + sdata->vif.bss_conf.dtim_period = setup->dtim_period; + return 0; } @@ -1711,6 +1798,14 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, mask)) conf->dot11MeshHWMPconfirmationInterval = nconf->dot11MeshHWMPconfirmationInterval; + if (_chg_mesh_attr(NL80211_MESHCONF_POWER_MODE, mask)) { + conf->power_mode = nconf->power_mode; + ieee80211_mps_local_status_update(sdata); + } + if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask)) + conf->dot11MeshAwakeWindowDuration = + nconf->dot11MeshAwakeWindowDuration; + ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON); return 0; } @@ -1736,9 +1831,7 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, if (err) return err; - ieee80211_start_mesh(sdata); - - return 0; + return ieee80211_start_mesh(sdata); } static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) @@ -1992,7 +2085,8 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - memcpy(sdata->vif.bss_conf.mcast_rate, rate, sizeof(rate)); + memcpy(sdata->vif.bss_conf.mcast_rate, rate, + sizeof(int) * IEEE80211_NUM_BANDS); return 0; } @@ -2195,7 +2289,8 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (sdata->vif.type != NL80211_IFTYPE_STATION) + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) @@ -2301,7 +2396,8 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, INIT_LIST_HEAD(&roc->dependents); /* if there's one pending or we're scanning, queue this one */ - if (!list_empty(&local->roc_list) || local->scanning) + if (!list_empty(&local->roc_list) || + local->scanning || local->radar_detect_enabled) goto out_check_combine; /* if not HW assist, just queue & schedule work */ @@ -2486,7 +2582,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, list_del(&dep->list); mutex_unlock(&local->mtx); - ieee80211_roc_notify_destroy(dep); + ieee80211_roc_notify_destroy(dep, true); return 0; } @@ -2526,7 +2622,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, ieee80211_start_next_roc(local); mutex_unlock(&local->mtx); - ieee80211_roc_notify_destroy(found); + ieee80211_roc_notify_destroy(found, true); } else { /* work may be pending so use it all the time */ found->abort = true; @@ -2536,6 +2632,8 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, /* work will clean up etc */ flush_delayed_work(&found->work); + WARN_ON(!found->to_be_freed); + kfree(found); } return 0; @@ -2551,6 +2649,37 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, return ieee80211_cancel_roc(local, cookie, false); } +static int ieee80211_start_radar_detection(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_chan_def *chandef) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + unsigned long timeout; + int err; + + if (!list_empty(&local->roc_list) || local->scanning) + return -EBUSY; + + /* whatever, but channel contexts should not complain about that one */ + sdata->smps_mode = IEEE80211_SMPS_OFF; + sdata->needed_rx_chains = local->rx_chains; + sdata->radar_required = true; + + mutex_lock(&local->iflist_mtx); + err = ieee80211_vif_use_channel(sdata, chandef, + IEEE80211_CHANCTX_SHARED); + mutex_unlock(&local->iflist_mtx); + if (err) + return err; + + timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS); + ieee80211_queue_delayed_work(&sdata->local->hw, + &sdata->dfs_cac_timer_work, timeout); + + return 0; +} + static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, unsigned int wait, const u8 *buf, size_t len, @@ -2655,7 +2784,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, goto out_unlock; } - IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN; + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN | + IEEE80211_TX_INTFL_OFFCHAN_TX_OK; if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) IEEE80211_SKB_CB(skb)->hw_queue = local->hw.offchannel_tx_hw_queue; @@ -3157,6 +3287,7 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_chanctx_conf *chanctx_conf; int ret = -ENODATA; @@ -3165,6 +3296,16 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, if (chanctx_conf) { *chandef = chanctx_conf->def; ret = 0; + } else if (local->open_count > 0 && + local->open_count == local->monitors && + sdata->vif.type == NL80211_IFTYPE_MONITOR) { + if (local->use_chanctx) + *chandef = local->monitor_chandef; + else + cfg80211_chandef_create(chandef, + local->_oper_channel, + local->_oper_channel_type); + ret = 0; } rcu_read_unlock(); @@ -3255,4 +3396,5 @@ struct cfg80211_ops mac80211_config_ops = { .get_et_stats = ieee80211_get_et_stats, .get_et_strings = ieee80211_get_et_strings, .get_channel = ieee80211_cfg_get_channel, + .start_radar_detection = ieee80211_start_radar_detection, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 53f0312..931be41 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -4,11 +4,12 @@ #include <linux/nl80211.h> #include <linux/export.h> +#include <linux/rtnetlink.h> #include <net/cfg80211.h> #include "ieee80211_i.h" #include "driver-ops.h" -static void ieee80211_change_chandef(struct ieee80211_local *local, +static void ieee80211_change_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, const struct cfg80211_chan_def *chandef) { @@ -48,7 +49,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local, if (!compat) continue; - ieee80211_change_chandef(local, ctx, compat); + ieee80211_change_chanctx(local, ctx, compat); return ctx; } @@ -62,6 +63,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local, enum ieee80211_chanctx_mode mode) { struct ieee80211_chanctx *ctx; + u32 changed; int err; lockdep_assert_held(&local->chanctx_mtx); @@ -75,6 +77,13 @@ ieee80211_new_chanctx(struct ieee80211_local *local, ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; + /* acquire mutex to prevent idle from changing */ + mutex_lock(&local->mtx); + /* turn idle off *before* setting channel -- some drivers need that */ + changed = ieee80211_idle_off(local); + if (changed) + ieee80211_hw_config(local, changed); + if (!local->use_chanctx) { local->_oper_channel_type = cfg80211_get_chandef_type(chandef); @@ -84,12 +93,19 @@ ieee80211_new_chanctx(struct ieee80211_local *local, err = drv_add_chanctx(local, ctx); if (err) { kfree(ctx); - return ERR_PTR(err); + ctx = ERR_PTR(err); + + ieee80211_recalc_idle(local); + goto out; } } + /* and keep the mutex held until the new chanctx is on the list */ list_add_rcu(&ctx->list, &local->chanctx_list); + out: + mutex_unlock(&local->mtx); + return ctx; } @@ -109,6 +125,10 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local, list_del_rcu(&ctx->list); kfree_rcu(ctx, rcu_head); + + mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); } static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, @@ -127,6 +147,11 @@ static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, ctx->refcount++; ieee80211_recalc_txpower(sdata); + sdata->vif.bss_conf.idle = false; + + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_MONITOR) + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); return 0; } @@ -161,7 +186,7 @@ static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, if (WARN_ON_ONCE(!compat)) return; - ieee80211_change_chandef(local, ctx, compat); + ieee80211_change_chanctx(local, ctx, compat); } static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata, @@ -174,11 +199,18 @@ static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata, ctx->refcount--; rcu_assign_pointer(sdata->vif.chanctx_conf, NULL); + sdata->vif.bss_conf.idle = true; + + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_MONITOR) + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); + drv_unassign_vif_chanctx(local, sdata, ctx); if (ctx->refcount > 0) { ieee80211_recalc_chanctx_chantype(sdata->local, ctx); ieee80211_recalc_smps_chanctx(local, ctx); + ieee80211_recalc_radar_chanctx(local, ctx); } } @@ -202,6 +234,37 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) ieee80211_free_chanctx(local, ctx); } +void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *chanctx) +{ + struct ieee80211_sub_if_data *sdata; + bool radar_enabled = false; + + lockdep_assert_held(&local->chanctx_mtx); + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (sdata->radar_required) { + radar_enabled = true; + break; + } + } + rcu_read_unlock(); + + if (radar_enabled == chanctx->conf.radar_enabled) + return; + + chanctx->conf.radar_enabled = radar_enabled; + local->radar_detect_enabled = chanctx->conf.radar_enabled; + + if (!local->use_chanctx) { + local->hw.conf.radar_enabled = chanctx->conf.radar_enabled; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + } + + drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR); +} + void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx) { @@ -317,6 +380,56 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, } ieee80211_recalc_smps_chanctx(local, ctx); + ieee80211_recalc_radar_chanctx(local, ctx); + out: + mutex_unlock(&local->chanctx_mtx); + return ret; +} + +int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + u32 *changed) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx_conf *conf; + struct ieee80211_chanctx *ctx; + int ret; + + if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, + IEEE80211_CHAN_DISABLED)) + return -EINVAL; + + mutex_lock(&local->chanctx_mtx); + if (cfg80211_chandef_identical(chandef, &sdata->vif.bss_conf.chandef)) { + ret = 0; + goto out; + } + + if (chandef->width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT) { + ret = -EINVAL; + goto out; + } + + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + if (!conf) { + ret = -EINVAL; + goto out; + } + + ctx = container_of(conf, struct ieee80211_chanctx, conf); + if (!cfg80211_chandef_compatible(&conf->def, chandef)) { + ret = -EINVAL; + goto out; + } + + sdata->vif.bss_conf.chandef = *chandef; + + ieee80211_recalc_chanctx_chantype(local, ctx); + + *changed |= BSS_CHANGED_BANDWIDTH; + ret = 0; out: mutex_unlock(&local->chanctx_mtx); return ret; @@ -331,6 +444,59 @@ void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) mutex_unlock(&sdata->local->chanctx_mtx); } +void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *ap; + struct ieee80211_chanctx_conf *conf; + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->bss)) + return; + + ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); + + mutex_lock(&local->chanctx_mtx); + + conf = rcu_dereference_protected(ap->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + rcu_assign_pointer(sdata->vif.chanctx_conf, conf); + mutex_unlock(&local->chanctx_mtx); +} + +void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, + bool clear) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *vlan; + struct ieee80211_chanctx_conf *conf; + + ASSERT_RTNL(); + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP)) + return; + + mutex_lock(&local->chanctx_mtx); + + /* + * Check that conf exists, even when clearing this function + * must be called with the AP's channel context still there + * as it would otherwise cause VLANs to have an invalid + * channel context pointer for a while, possibly pointing + * to a channel context that has already been freed. + */ + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + WARN_ON(!conf); + + if (clear) + conf = NULL; + + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + rcu_assign_pointer(vlan->vif.chanctx_conf, conf); + + mutex_unlock(&local->chanctx_mtx); +} + void ieee80211_iter_chan_contexts_atomic( struct ieee80211_hw *hw, void (*iter)(struct ieee80211_hw *hw, @@ -343,7 +509,8 @@ void ieee80211_iter_chan_contexts_atomic( rcu_read_lock(); list_for_each_entry_rcu(ctx, &local->chanctx_list, list) - iter(hw, &ctx->conf, iter_data); + if (ctx->driver_present) + iter(hw, &ctx->conf, iter_data); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ieee80211_iter_chan_contexts_atomic); diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h index 8f383a5..4ccc5ed 100644 --- a/net/mac80211/debug.h +++ b/net/mac80211/debug.h @@ -44,6 +44,12 @@ #define MAC80211_MESH_SYNC_DEBUG 0 #endif +#ifdef CONFIG_MAC80211_MESH_PS_DEBUG +#define MAC80211_MESH_PS_DEBUG 1 +#else +#define MAC80211_MESH_PS_DEBUG 0 +#endif + #ifdef CONFIG_MAC80211_TDLS_DEBUG #define MAC80211_TDLS_DEBUG 1 #else @@ -151,6 +157,10 @@ do { \ _sdata_dbg(MAC80211_MESH_SYNC_DEBUG, \ sdata, fmt, ##__VA_ARGS__) +#define mps_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MESH_PS_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + #define tdls_dbg(sdata, fmt, ...) \ _sdata_dbg(MAC80211_TDLS_DEBUG, \ sdata, fmt, ##__VA_ARGS__) diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 466f4b4..b0e32d6 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -121,8 +121,8 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, sf += snprintf(buf + sf, mxln - sf, "SIGNAL_UNSPEC\n"); if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) sf += snprintf(buf + sf, mxln - sf, "SIGNAL_DBM\n"); - if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) - sf += snprintf(buf + sf, mxln - sf, "NEED_DTIM_PERIOD\n"); + if (local->hw.flags & IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC) + sf += snprintf(buf + sf, mxln - sf, "NEED_DTIM_BEFORE_ASSOC\n"); if (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT) sf += snprintf(buf + sf, mxln - sf, "SPECTRUM_MGMT\n"); if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) @@ -151,8 +151,6 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, sf += snprintf(buf + sf, mxln - sf, "AP_LINK_PS\n"); if (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW) sf += snprintf(buf + sf, mxln - sf, "TX_AMPDU_SETUP_IN_HW\n"); - if (local->hw.flags & IEEE80211_HW_SCAN_WHILE_IDLE) - sf += snprintf(buf + sf, mxln - sf, "SCAN_WHILE_IDLE\n"); rv = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); kfree(buf); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index cbde5cc..059bbb8 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -515,6 +515,9 @@ IEEE80211_IF_FILE(dot11MeshHWMProotInterval, u.mesh.mshcfg.dot11MeshHWMProotInterval, DEC); IEEE80211_IF_FILE(dot11MeshHWMPconfirmationInterval, u.mesh.mshcfg.dot11MeshHWMPconfirmationInterval, DEC); +IEEE80211_IF_FILE(power_mode, u.mesh.mshcfg.power_mode, DEC); +IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration, + u.mesh.mshcfg.dot11MeshAwakeWindowDuration, DEC); #endif #define DEBUGFS_ADD_MODE(name, mode) \ @@ -620,6 +623,8 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) MESHPARAMS_ADD(dot11MeshHWMPactivePathToRootTimeout); MESHPARAMS_ADD(dot11MeshHWMProotInterval); MESHPARAMS_ADD(dot11MeshHWMPconfirmationInterval); + MESHPARAMS_ADD(power_mode); + MESHPARAMS_ADD(dot11MeshAwakeWindowDuration); #undef MESHPARAMS_ADD } #endif diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 6fb1168..c7591f7 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -65,7 +65,7 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf, test_sta_flag(sta, WLAN_STA_##flg) ? #flg "\n" : "" int res = scnprintf(buf, sizeof(buf), - "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", TEST(AUTH), TEST(ASSOC), TEST(PS_STA), TEST(PS_DRIVER), TEST(AUTHORIZED), TEST(SHORT_PREAMBLE), @@ -74,7 +74,8 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf, TEST(UAPSD), TEST(SP), TEST(TDLS_PEER), TEST(TDLS_PEER_AUTH), TEST(4ADDR_EVENT), TEST(INSERTED), TEST(RATE_CONTROL), - TEST(TOFFSET_KNOWN)); + TEST(TOFFSET_KNOWN), TEST(MPSP_OWNER), + TEST(MPSP_RECIPIENT)); #undef TEST return simple_read_from_buffer(userbuf, count, ppos, buf, res); } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 698dc7e..ee56d07 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -207,6 +207,17 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, { might_sleep(); + if (WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED) && + sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT)) + return; + + if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || + sdata->vif.type == NL80211_IFTYPE_MONITOR)) + return; + check_sdata_in_driver(sdata); trace_drv_bss_info_changed(local, sdata, info, changed); @@ -520,6 +531,43 @@ static inline void drv_sta_remove_debugfs(struct ieee80211_local *local, local->ops->sta_remove_debugfs(&local->hw, &sdata->vif, sta, dir); } + +static inline +void drv_add_interface_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + + check_sdata_in_driver(sdata); + + if (!local->ops->add_interface_debugfs) + return; + + local->ops->add_interface_debugfs(&local->hw, &sdata->vif, + sdata->debugfs.dir); +} + +static inline +void drv_remove_interface_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + + check_sdata_in_driver(sdata); + + if (!local->ops->remove_interface_debugfs) + return; + + local->ops->remove_interface_debugfs(&local->hw, &sdata->vif, + sdata->debugfs.dir); +} +#else +static inline +void drv_add_interface_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) {} +static inline +void drv_remove_interface_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) {} #endif static inline __must_check @@ -561,7 +609,8 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local, check_sdata_in_driver(sdata); WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED && - sdata->vif.type != NL80211_IFTYPE_ADHOC); + (sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT)); trace_drv_sta_rc_update(local, sdata, sta, changed); if (local->ops->sta_rc_update) @@ -837,11 +886,12 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local, } static inline void drv_rssi_callback(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, const enum ieee80211_rssi_event event) { - trace_drv_rssi_callback(local, event); + trace_drv_rssi_callback(local, sdata, event); if (local->ops->rssi_callback) - local->ops->rssi_callback(&local->hw, event); + local->ops->rssi_callback(&local->hw, &sdata->vif, event); trace_drv_return_void(local); } @@ -913,6 +963,8 @@ static inline int drv_add_chanctx(struct ieee80211_local *local, if (local->ops->add_chanctx) ret = local->ops->add_chanctx(&local->hw, &ctx->conf); trace_drv_return_int(local, ret); + if (!ret) + ctx->driver_present = true; return ret; } @@ -924,6 +976,7 @@ static inline void drv_remove_chanctx(struct ieee80211_local *local, if (local->ops->remove_chanctx) local->ops->remove_chanctx(&local->hw, &ctx->conf); trace_drv_return_void(local); + ctx->driver_present = false; } static inline void drv_change_chanctx(struct ieee80211_local *local, @@ -931,8 +984,10 @@ static inline void drv_change_chanctx(struct ieee80211_local *local, u32 changed) { trace_drv_change_chanctx(local, ctx, changed); - if (local->ops->change_chanctx) + if (local->ops->change_chanctx) { + WARN_ON_ONCE(!ctx->driver_present); local->ops->change_chanctx(&local->hw, &ctx->conf, changed); + } trace_drv_return_void(local); } @@ -945,10 +1000,12 @@ static inline int drv_assign_vif_chanctx(struct ieee80211_local *local, check_sdata_in_driver(sdata); trace_drv_assign_vif_chanctx(local, sdata, ctx); - if (local->ops->assign_vif_chanctx) + if (local->ops->assign_vif_chanctx) { + WARN_ON_ONCE(!ctx->driver_present); ret = local->ops->assign_vif_chanctx(&local->hw, &sdata->vif, &ctx->conf); + } trace_drv_return_int(local, ret); return ret; @@ -961,10 +1018,12 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local, check_sdata_in_driver(sdata); trace_drv_unassign_vif_chanctx(local, sdata, ctx); - if (local->ops->unassign_vif_chanctx) + if (local->ops->unassign_vif_chanctx) { + WARN_ON_ONCE(!ctx->driver_present); local->ops->unassign_vif_chanctx(&local->hw, &sdata->vif, &ctx->conf); + } trace_drv_return_void(local); } @@ -1003,4 +1062,32 @@ static inline void drv_restart_complete(struct ieee80211_local *local) trace_drv_return_void(local); } +static inline void +drv_set_default_unicast_key(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + int key_idx) +{ + check_sdata_in_driver(sdata); + + WARN_ON_ONCE(key_idx < -1 || key_idx > 3); + + trace_drv_set_default_unicast_key(local, sdata, key_idx); + if (local->ops->set_default_unicast_key) + local->ops->set_default_unicast_key(&local->hw, &sdata->vif, + key_idx); + trace_drv_return_void(local); +} + +#if IS_ENABLED(CONFIG_IPV6) +static inline void drv_ipv6_addr_change(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct inet6_dev *idev) +{ + trace_drv_ipv6_addr_change(local, sdata); + if (local->ops->ipv6_addr_change) + local->ops->ipv6_addr_change(&local->hw, &sdata->vif, idev); + trace_drv_return_void(local); +} +#endif + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index a71d891..0db25d4 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -37,6 +37,9 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, u8 *smask = (u8 *)(&sdata->u.mgd.ht_capa_mask.mcs.rx_mask); int i; + if (!ht_cap->ht_supported) + return; + if (sdata->vif.type != NL80211_IFTYPE_STATION) { /* AP interfaces call this code when adding new stations, * so just silently ignore non station interfaces. @@ -62,6 +65,9 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SUP_WIDTH_20_40); __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_40); + /* Allow user to disable SGI-20 (SGI-40 is handled above) */ + __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_20); + /* Allow user to disable the max-AMSDU bit. */ __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_MAX_AMSDU); @@ -86,22 +92,24 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, } -void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, +bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct ieee80211_ht_cap *ht_cap_ie, - struct ieee80211_sta_ht_cap *ht_cap) + const struct ieee80211_ht_cap *ht_cap_ie, + struct sta_info *sta) { + struct ieee80211_sta_ht_cap ht_cap; u8 ampdu_info, tx_mcs_set_cap; int i, max_tx_streams; + bool changed; + enum ieee80211_sta_rx_bandwidth bw; + enum ieee80211_smps_mode smps_mode; - BUG_ON(!ht_cap); - - memset(ht_cap, 0, sizeof(*ht_cap)); + memset(&ht_cap, 0, sizeof(ht_cap)); if (!ht_cap_ie || !sband->ht_cap.ht_supported) - return; + goto apply; - ht_cap->ht_supported = true; + ht_cap.ht_supported = true; /* * The bits listed in this expression should be @@ -109,7 +117,7 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, * advertises more then we can't use those thus * we mask them out. */ - ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & + ht_cap.cap = le16_to_cpu(ht_cap_ie->cap_info) & (sband->ht_cap.cap | ~(IEEE80211_HT_CAP_LDPC_CODING | IEEE80211_HT_CAP_SUP_WIDTH_20_40 | @@ -117,30 +125,31 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_DSSSCCK40)); + /* * The STBC bits are asymmetric -- if we don't have * TX then mask out the peer's RX and vice versa. */ if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC)) - ht_cap->cap &= ~IEEE80211_HT_CAP_RX_STBC; + ht_cap.cap &= ~IEEE80211_HT_CAP_RX_STBC; if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC)) - ht_cap->cap &= ~IEEE80211_HT_CAP_TX_STBC; + ht_cap.cap &= ~IEEE80211_HT_CAP_TX_STBC; ampdu_info = ht_cap_ie->ampdu_params_info; - ht_cap->ampdu_factor = + ht_cap.ampdu_factor = ampdu_info & IEEE80211_HT_AMPDU_PARM_FACTOR; - ht_cap->ampdu_density = + ht_cap.ampdu_density = (ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2; /* own MCS TX capabilities */ tx_mcs_set_cap = sband->ht_cap.mcs.tx_params; /* Copy peer MCS TX capabilities, the driver might need them. */ - ht_cap->mcs.tx_params = ht_cap_ie->mcs.tx_params; + ht_cap.mcs.tx_params = ht_cap_ie->mcs.tx_params; /* can we TX with MCS rates? */ if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED)) - return; + goto apply; /* Counting from 0, therefore +1 */ if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_RX_DIFF) @@ -158,37 +167,90 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, * - remainder are multiple spatial streams using unequal modulation */ for (i = 0; i < max_tx_streams; i++) - ht_cap->mcs.rx_mask[i] = + ht_cap.mcs.rx_mask[i] = sband->ht_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION) for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE; i < IEEE80211_HT_MCS_MASK_LEN; i++) - ht_cap->mcs.rx_mask[i] = + ht_cap.mcs.rx_mask[i] = sband->ht_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; /* handle MCS rate 32 too */ if (sband->ht_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1) - ht_cap->mcs.rx_mask[32/8] |= 1; + ht_cap.mcs.rx_mask[32/8] |= 1; + apply: /* * If user has specified capability over-rides, take care * of that here. */ - ieee80211_apply_htcap_overrides(sdata, ht_cap); + ieee80211_apply_htcap_overrides(sdata, &ht_cap); + + changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); + + memcpy(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); + + switch (sdata->vif.bss_conf.chandef.width) { + default: + WARN_ON_ONCE(1); + /* fall through */ + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + bw = IEEE80211_STA_RX_BW_20; + break; + case NL80211_CHAN_WIDTH_40: + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + bw = ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; + break; + } + + if (bw != sta->sta.bandwidth) + changed = true; + sta->sta.bandwidth = bw; + + sta->cur_max_bandwidth = + ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; + + switch ((ht_cap.cap & IEEE80211_HT_CAP_SM_PS) + >> IEEE80211_HT_CAP_SM_PS_SHIFT) { + case WLAN_HT_CAP_SM_PS_INVALID: + case WLAN_HT_CAP_SM_PS_STATIC: + smps_mode = IEEE80211_SMPS_STATIC; + break; + case WLAN_HT_CAP_SM_PS_DYNAMIC: + smps_mode = IEEE80211_SMPS_DYNAMIC; + break; + case WLAN_HT_CAP_SM_PS_DISABLED: + smps_mode = IEEE80211_SMPS_OFF; + break; + } + + if (smps_mode != sta->sta.smps_mode) + changed = true; + sta->sta.smps_mode = smps_mode; + + return changed; } -void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, bool tx) +void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, + enum ieee80211_agg_stop_reason reason) { int i; cancel_work_sync(&sta->ampdu_mlme.work); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { - __ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR, tx); + __ieee80211_stop_tx_ba_session(sta, i, reason); __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_LEAVE_QBSS, tx); + WLAN_REASON_QSTA_LEAVE_QBSS, + reason != AGG_STOP_DESTROY_STA && + reason != AGG_STOP_PEER_REQUEST); } } @@ -245,8 +307,7 @@ void ieee80211_ba_session_work(struct work_struct *work) if (tid_tx && test_and_clear_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state)) ___ieee80211_stop_tx_ba_session(sta, tid, - WLAN_BACK_INITIATOR, - true); + AGG_STOP_LOCAL_REQUEST); } mutex_unlock(&sta->ampdu_mlme.mtx); } @@ -314,8 +375,7 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0, true); else - __ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, - true); + __ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_PEER_REQUEST); } int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, @@ -387,6 +447,9 @@ void ieee80211_request_smps(struct ieee80211_vif *vif, if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF)) smps_mode = IEEE80211_SMPS_AUTOMATIC; + if (sdata->u.mgd.driver_smps_mode == smps_mode) + return; + sdata->u.mgd.driver_smps_mode = smps_mode; ieee80211_queue_work(&sdata->local->hw, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 8881fc7..40b71df 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -67,7 +67,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, skb_reserve(skb, sdata->local->hw.extra_tx_headroom); if (!ether_addr_equal(ifibss->bssid, bssid)) - sta_info_flush(sdata->local, sdata); + sta_info_flush(sdata); /* if merging, indicate to driver that we leave the old IBSS */ if (sdata->vif.bss_conf.ibss_joined) { @@ -191,6 +191,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, rcu_assign_pointer(ifibss->presp, skb); + sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.beacon_int = beacon_int; sdata->vif.bss_conf.basic_rates = basic_rates; bss_change = BSS_CHANGED_BEACON_INT; @@ -227,7 +228,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, bss = cfg80211_inform_bss_frame(local->hw.wiphy, chan, mgmt, skb->len, 0, GFP_KERNEL); - cfg80211_put_bss(bss); + cfg80211_put_bss(local->hw.wiphy, bss); netif_carrier_on(sdata->dev); cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL); } @@ -241,6 +242,8 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, u32 basic_rates; int i, j; u16 beacon_int = cbss->beacon_interval; + const struct cfg80211_bss_ies *ies; + u64 tsf; lockdep_assert_held(&sdata->u.ibss.mtx); @@ -264,13 +267,17 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, } } + rcu_read_lock(); + ies = rcu_dereference(cbss->ies); + tsf = ies->tsf; + rcu_read_unlock(); + __ieee80211_sta_join_ibss(sdata, cbss->bssid, beacon_int, cbss->channel, basic_rates, cbss->capability, - cbss->tsf, - false); + tsf, false); } static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, @@ -301,7 +308,7 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, "TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", sdata->vif.addr, addr, sdata->u.ibss.bssid); ieee80211_send_auth(sdata, 1, WLAN_AUTH_OPEN, 0, NULL, 0, - addr, sdata->u.ibss.bssid, NULL, 0, 0); + addr, sdata->u.ibss.bssid, NULL, 0, 0, 0); } return sta; } @@ -421,15 +428,13 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, * has actually implemented this. */ ieee80211_send_auth(sdata, 2, WLAN_AUTH_OPEN, 0, NULL, 0, - mgmt->sa, sdata->u.ibss.bssid, NULL, 0, 0); + mgmt->sa, sdata->u.ibss.bssid, NULL, 0, 0, 0); } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len, + struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status, - struct ieee802_11_elems *elems, - bool beacon) + struct ieee802_11_elems *elems) { struct ieee80211_local *local = sdata->local; int freq; @@ -491,33 +496,26 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, if (sta && elems->ht_operation && elems->ht_cap_elem && sdata->u.ibss.channel_type != NL80211_CHAN_NO_HT) { /* we both use HT */ - struct ieee80211_sta_ht_cap sta_ht_cap_new; + struct ieee80211_ht_cap htcap_ie; struct cfg80211_chan_def chandef; ieee80211_ht_oper_to_chandef(channel, elems->ht_operation, &chandef); - ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, - elems->ht_cap_elem, - &sta_ht_cap_new); + memcpy(&htcap_ie, elems->ht_cap_elem, sizeof(htcap_ie)); /* * fall back to HT20 if we don't use or use * the other extension channel */ - if (chandef.width != NL80211_CHAN_WIDTH_40 || - cfg80211_get_chandef_type(&chandef) != + if (cfg80211_get_chandef_type(&chandef) != sdata->u.ibss.channel_type) - sta_ht_cap_new.cap &= - ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; - - if (memcmp(&sta->sta.ht_cap, &sta_ht_cap_new, - sizeof(sta_ht_cap_new))) { - memcpy(&sta->sta.ht_cap, &sta_ht_cap_new, - sizeof(sta_ht_cap_new)); - rates_updated = true; - } + htcap_ie.cap_info &= + cpu_to_le16(~IEEE80211_HT_CAP_SUP_WIDTH_20_40); + + rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap( + sdata, sband, &htcap_ie, sta); } if (sta && rates_updated) { @@ -530,14 +528,14 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, - channel, beacon); + channel); if (!bss) return; cbss = container_of((void *)bss, struct cfg80211_bss, priv); - /* was just updated in ieee80211_bss_info_update */ - beacon_timestamp = cbss->tsf; + /* same for beacon and probe response */ + beacon_timestamp = le64_to_cpu(mgmt->u.beacon.timestamp); /* check if we need to merge IBSS */ @@ -703,8 +701,8 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) sdata_info(sdata, "No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n"); - ieee80211_request_internal_scan(sdata, - ifibss->ssid, ifibss->ssid_len, NULL); + ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len, + NULL); } static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) @@ -802,9 +800,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) IEEE80211_SCAN_INTERVAL)) { sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); - ieee80211_request_internal_scan(sdata, - ifibss->ssid, ifibss->ssid_len, - ifibss->fixed_channel ? ifibss->channel : NULL); + ieee80211_request_ibss_scan(sdata, ifibss->ssid, + ifibss->ssid_len, chan); } else { int interval = IEEE80211_SCAN_INTERVAL; @@ -878,14 +875,21 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, skb); } -static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len, - struct ieee80211_rx_status *rx_status) +static +void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status) { size_t baselen; struct ieee802_11_elems elems; + BUILD_BUG_ON(offsetof(typeof(mgmt->u.probe_resp), variable) != + offsetof(typeof(mgmt->u.beacon), variable)); + + /* + * either beacon or probe_resp but the variable field is at the + * same offset + */ baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt; if (baselen > len) return; @@ -893,25 +897,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, &elems); - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false); -} - -static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len, - struct ieee80211_rx_status *rx_status) -{ - size_t baselen; - struct ieee802_11_elems elems; - - /* Process beacon from the current BSS */ - baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; - if (baselen > len) - return; - - ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); - - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true); + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); } void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, @@ -935,12 +921,9 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_rx_mgmt_probe_req(sdata, skb); break; case IEEE80211_STYPE_PROBE_RESP: - ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, - rx_status); - break; case IEEE80211_STYPE_BEACON: - ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, - rx_status); + ieee80211_rx_mgmt_probe_beacon(sdata, mgmt, skb->len, + rx_status); break; case IEEE80211_STYPE_AUTH: ieee80211_rx_mgmt_auth_ibss(sdata, mgmt, skb->len); @@ -1118,10 +1101,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, mutex_unlock(&sdata->u.ibss.mtx); - mutex_lock(&sdata->local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&sdata->local->mtx); - /* * 802.11n-2009 9.13.3.1: In an IBSS, the HT Protection field is * reserved, but an HT STA shall protect HT transmissions as though @@ -1175,7 +1154,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) if (cbss) { cfg80211_unlink_bss(local->hw.wiphy, cbss); - cfg80211_put_bss(cbss); + cfg80211_put_bss(local->hw.wiphy, cbss); } } @@ -1183,7 +1162,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) memset(ifibss->bssid, 0, ETH_ALEN); ifibss->ssid_len = 0; - sta_info_flush(sdata->local, sdata); + sta_info_flush(sdata); spin_lock_bh(&ifibss->incomplete_lock); while (!list_empty(&ifibss->incomplete_stations)) { @@ -1206,6 +1185,8 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); sdata->vif.bss_conf.ibss_joined = false; sdata->vif.bss_conf.ibss_creator = false; + sdata->vif.bss_conf.enable_beacon = false; + clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); synchronize_rcu(); @@ -1217,9 +1198,5 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) mutex_unlock(&sdata->u.ibss.mtx); - mutex_lock(&local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&local->mtx); - return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 42d0d02..5672533 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -86,25 +86,11 @@ struct ieee80211_fragment_entry { struct ieee80211_bss { - /* don't want to look up all the time */ - size_t ssid_len; - u8 ssid[IEEE80211_MAX_SSID_LEN]; - - u32 device_ts; - - u8 dtim_period; + u32 device_ts_beacon, device_ts_presp; bool wmm_used; bool uapsd_supported; - unsigned long last_probe_resp; - -#ifdef CONFIG_MAC80211_MESH - u8 *mesh_id; - size_t mesh_id_len; - u8 *mesh_cfg; -#endif - #define IEEE80211_MAX_SUPP_RATES 32 u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; size_t supp_rates_len; @@ -140,7 +126,6 @@ enum ieee80211_bss_corrupt_data_flags { /** * enum ieee80211_valid_data_flags - BSS valid data flags - * @IEEE80211_BSS_VALID_DTIM: DTIM data was gathered from non-corrupt IE * @IEEE80211_BSS_VALID_WMM: WMM/UAPSD data was gathered from non-corrupt IE * @IEEE80211_BSS_VALID_RATES: Supported rates were gathered from non-corrupt IE * @IEEE80211_BSS_VALID_ERP: ERP flag was gathered from non-corrupt IE @@ -151,37 +136,11 @@ enum ieee80211_bss_corrupt_data_flags { * beacon/probe response. */ enum ieee80211_bss_valid_data_flags { - IEEE80211_BSS_VALID_DTIM = BIT(0), IEEE80211_BSS_VALID_WMM = BIT(1), IEEE80211_BSS_VALID_RATES = BIT(2), IEEE80211_BSS_VALID_ERP = BIT(3) }; -static inline u8 *bss_mesh_cfg(struct ieee80211_bss *bss) -{ -#ifdef CONFIG_MAC80211_MESH - return bss->mesh_cfg; -#endif - return NULL; -} - -static inline u8 *bss_mesh_id(struct ieee80211_bss *bss) -{ -#ifdef CONFIG_MAC80211_MESH - return bss->mesh_id; -#endif - return NULL; -} - -static inline u8 bss_mesh_id_len(struct ieee80211_bss *bss) -{ -#ifdef CONFIG_MAC80211_MESH - return bss->mesh_id_len; -#endif - return 0; -} - - typedef unsigned __bitwise__ ieee80211_tx_result; #define TX_CONTINUE ((__force ieee80211_tx_result) 0u) #define TX_DROP ((__force ieee80211_tx_result) 1u) @@ -350,6 +309,7 @@ struct ieee80211_roc_work { struct ieee80211_channel *chan; bool started, abort, hw_begun, notified; + bool to_be_freed; unsigned long hw_start_time; @@ -384,6 +344,7 @@ struct ieee80211_mgd_auth_data { u8 key[WLAN_KEY_LEN_WEP104]; u8 key_len, key_idx; bool done; + bool timeout_started; u16 sae_trans, sae_status; size_t data_len; @@ -403,12 +364,14 @@ struct ieee80211_mgd_assoc_data { u8 ssid_len; u8 supp_rates_len; bool wmm, uapsd; - bool have_beacon; - bool sent_assoc; + bool have_beacon, need_beacon; bool synced; + bool timeout_started; u8 ap_ht_param; + struct ieee80211_vht_cap ap_vht_cap; + size_t ie_len; u8 ie[]; }; @@ -427,6 +390,7 @@ struct ieee80211_if_managed { unsigned long probe_timeout; int probe_send_count; bool nullfunc_failed; + bool connection_loss; struct mutex mtx; struct cfg80211_bss *associated; @@ -440,6 +404,7 @@ struct ieee80211_if_managed { unsigned long timers_running; /* used for quiesce/restart */ bool powersave; /* powersave requested for this iface */ bool broken_ap; /* AP is broken -- turn off powersave */ + u8 dtim_period; enum ieee80211_smps_mode req_smps, /* requested smps mode */ driver_smps_mode; /* smps mode request */ @@ -450,6 +415,10 @@ struct ieee80211_if_managed { bool beacon_crc_valid; u32 beacon_crc; + bool status_acked; + bool status_received; + __le16 status_fc; + enum { IEEE80211_MFP_DISABLED, IEEE80211_MFP_OPTIONAL, @@ -612,6 +581,9 @@ struct ieee80211_if_mesh { u32 mesh_seqnum; bool accepting_plinks; int num_gates; + struct beacon_data __rcu *beacon; + /* just protects beacon updates for now */ + struct mutex mtx; const u8 *ie; u8 ie_len; enum { @@ -624,6 +596,11 @@ struct ieee80211_if_mesh { s64 sync_offset_clockdrift_max; spinlock_t sync_offset_lock; bool adjusting_tbtt; + /* mesh power save */ + enum nl80211_mesh_power_mode nonpeer_pm; + int ps_peers_light_sleep; + int ps_peers_deep_sleep; + struct ps_data ps; }; #ifdef CONFIG_MAC80211_MESH @@ -662,10 +639,13 @@ enum ieee80211_sub_if_data_flags { * change handling while the interface is up * @SDATA_STATE_OFFCHANNEL: This interface is currently in offchannel * mode, so queues are stopped + * @SDATA_STATE_OFFCHANNEL_BEACON_STOPPED: Beaconing was stopped due + * to offchannel, reset when offchannel returns */ enum ieee80211_sdata_state_bits { SDATA_STATE_RUNNING, SDATA_STATE_OFFCHANNEL, + SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, }; /** @@ -688,6 +668,7 @@ struct ieee80211_chanctx { enum ieee80211_chanctx_mode mode; int refcount; + bool driver_present; struct ieee80211_chanctx_conf conf; }; @@ -714,9 +695,6 @@ struct ieee80211_sub_if_data { char name[IFNAMSIZ]; - /* to detect idle changes */ - bool old_idle; - /* Fragment table for host-based reassembly */ struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; unsigned int fragment_next; @@ -744,14 +722,15 @@ struct ieee80211_sub_if_data { struct work_struct work; struct sk_buff_head skb_queue; - bool arp_filter_state; - u8 needed_rx_chains; enum ieee80211_smps_mode smps_mode; int user_power_level; /* in dBm */ int ap_power_level; /* in dBm */ + bool radar_required; + struct delayed_work dfs_cac_timer_work; + /* * AP this belongs to: self in AP mode and * corresponding AP in VLAN mode, NULL for @@ -773,6 +752,10 @@ struct ieee80211_sub_if_data { u32 mntr_flags; } u; + spinlock_t cleanup_stations_lock; + struct list_head cleanup_stations; + struct work_struct cleanup_stations_wk; + #ifdef CONFIG_MAC80211_DEBUGFS struct { struct dentry *dir; @@ -782,6 +765,11 @@ struct ieee80211_sub_if_data { struct dentry *default_mgmt_key; } debugfs; #endif + +#ifdef CONFIG_PM + struct ieee80211_bss_conf suspend_bss_conf; +#endif + /* must be last, dynamically sized area in this! */ struct ieee80211_vif vif; }; @@ -830,6 +818,7 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_AGGREGATION, IEEE80211_QUEUE_STOP_REASON_SUSPEND, IEEE80211_QUEUE_STOP_REASON_SKB_ADD, + IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, }; #ifdef CONFIG_MAC80211_LEDS @@ -962,6 +951,10 @@ struct ieee80211_local { /* wowlan is enabled -- don't reconfig on resume */ bool wowlan; + /* DFS/radar detection is enabled */ + bool radar_detect_enabled; + struct work_struct radar_detected_work; + /* number of RX chains the hardware has */ u8 rx_chains; @@ -976,14 +969,7 @@ struct ieee80211_local { struct sk_buff_head skb_queue; struct sk_buff_head skb_queue_unreliable; - /* - * Internal FIFO queue which is shared between multiple rx path - * stages. Its main task is to provide a serialization mechanism, - * so all rx handlers can enjoy having exclusive access to their - * private data structures. - */ - struct sk_buff_head rx_skb_queue; - bool running_rx_handler; /* protected by rx_skb_queue.lock */ + spinlock_t rx_path_lock; /* Station data */ /* @@ -1117,14 +1103,13 @@ struct ieee80211_local { struct timer_list dynamic_ps_timer; struct notifier_block network_latency_notifier; struct notifier_block ifa_notifier; + struct notifier_block ifa6_notifier; /* * The dynamic ps timeout configured from user space via WEXT - * this will override whatever chosen by mac80211 internally. */ int dynamic_ps_forced_timeout; - int dynamic_ps_user_timeout; - bool disable_dynamic_ps; int user_power_level; /* in dBm, for all interfaces */ @@ -1182,40 +1167,41 @@ struct ieee80211_ra_tid { /* Parsed Information Elements */ struct ieee802_11_elems { - u8 *ie_start; + const u8 *ie_start; size_t total_len; /* pointers to IEs */ - u8 *ssid; - u8 *supp_rates; - u8 *fh_params; - u8 *ds_params; - u8 *cf_params; - struct ieee80211_tim_ie *tim; - u8 *ibss_params; - u8 *challenge; - u8 *wpa; - u8 *rsn; - u8 *erp_info; - u8 *ext_supp_rates; - u8 *wmm_info; - u8 *wmm_param; - struct ieee80211_ht_cap *ht_cap_elem; - struct ieee80211_ht_operation *ht_operation; - struct ieee80211_vht_cap *vht_cap_elem; - struct ieee80211_vht_operation *vht_operation; - struct ieee80211_meshconf_ie *mesh_config; - u8 *mesh_id; - u8 *peering; - u8 *preq; - u8 *prep; - u8 *perr; - struct ieee80211_rann_ie *rann; - struct ieee80211_channel_sw_ie *ch_switch_ie; - u8 *country_elem; - u8 *pwr_constr_elem; - u8 *quiet_elem; /* first quite element */ - u8 *timeout_int; + const u8 *ssid; + const u8 *supp_rates; + const u8 *fh_params; + const u8 *ds_params; + const u8 *cf_params; + const struct ieee80211_tim_ie *tim; + const u8 *ibss_params; + const u8 *challenge; + const u8 *rsn; + const u8 *erp_info; + const u8 *ext_supp_rates; + const u8 *wmm_info; + const u8 *wmm_param; + const struct ieee80211_ht_cap *ht_cap_elem; + const struct ieee80211_ht_operation *ht_operation; + const struct ieee80211_vht_cap *vht_cap_elem; + const struct ieee80211_vht_operation *vht_operation; + const struct ieee80211_meshconf_ie *mesh_config; + const u8 *mesh_id; + const u8 *peering; + const __le16 *awake_window; + const u8 *preq; + const u8 *prep; + const u8 *perr; + const struct ieee80211_rann_ie *rann; + const struct ieee80211_channel_sw_ie *ch_switch_ie; + const u8 *country_elem; + const u8 *pwr_constr_elem; + const u8 *quiet_elem; /* first quite element */ + const u8 *timeout_int; + const u8 *opmode_notif; /* length of them, respectively */ u8 ssid_len; @@ -1226,7 +1212,6 @@ struct ieee802_11_elems { u8 tim_len; u8 ibss_params_len; u8 challenge_len; - u8 wpa_len; u8 rsn_len; u8 erp_info_len; u8 ext_supp_rates_len; @@ -1295,10 +1280,10 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata); int ieee80211_max_network_latency(struct notifier_block *nb, unsigned long data, void *dummy); int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata); -void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel_sw_ie *sw_elem, - struct ieee80211_bss *bss, - u64 timestamp); +void +ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_channel_sw_ie *sw_elem, + struct ieee80211_bss *bss, u64 timestamp); void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); @@ -1307,6 +1292,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata); void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata); +void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata, + __le16 fc, bool acked); /* IBSS code */ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); @@ -1329,9 +1316,9 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, /* scan/BSS handling */ void ieee80211_scan_work(struct work_struct *work); -int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, - const u8 *ssid, u8 ssid_len, - struct ieee80211_channel *chan); +int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, + const u8 *ssid, u8 ssid_len, + struct ieee80211_channel *chan); int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req); void ieee80211_scan_cancel(struct ieee80211_local *local); @@ -1345,8 +1332,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local, struct ieee80211_mgmt *mgmt, size_t len, struct ieee802_11_elems *elems, - struct ieee80211_channel *channel, - bool beacon); + struct ieee80211_channel *channel); void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_bss *bss); @@ -1357,14 +1343,12 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata); void ieee80211_sched_scan_stopped_work(struct work_struct *work); /* off-channel helpers */ -void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, - bool offchannel_ps_enable); -void ieee80211_offchannel_return(struct ieee80211_local *local, - bool offchannel_ps_disable); +void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local); +void ieee80211_offchannel_return(struct ieee80211_local *local); void ieee80211_roc_setup(struct ieee80211_local *local); void ieee80211_start_next_roc(struct ieee80211_local *local); void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata); -void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc); +void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free); void ieee80211_sw_roc_work(struct work_struct *work); void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); @@ -1378,6 +1362,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, enum nl80211_iftype type); void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata); void ieee80211_remove_interfaces(struct ieee80211_local *local); +u32 ieee80211_idle_off(struct ieee80211_local *local); void ieee80211_recalc_idle(struct ieee80211_local *local); void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata, const int offset); @@ -1405,10 +1390,10 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw, /* HT */ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap *ht_cap); -void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, +bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct ieee80211_ht_cap *ht_cap_ie, - struct ieee80211_sta_ht_cap *ht_cap); + const struct ieee80211_ht_cap *ht_cap_ie, + struct sta_info *sta); void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, u16 initiator, u16 reason_code); @@ -1421,7 +1406,8 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool stop); void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool stop); -void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, bool tx); +void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, + enum ieee80211_agg_stop_reason reason); void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_mgmt *mgmt, size_t len); @@ -1435,11 +1421,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, size_t len); int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator, - bool tx); + enum ieee80211_agg_stop_reason reason); int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator, - bool tx); + enum ieee80211_agg_stop_reason reason); void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid); void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid); void ieee80211_ba_session_work(struct work_struct *work); @@ -1449,10 +1433,17 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid); u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs); /* VHT */ -void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - struct ieee80211_vht_cap *vht_cap_ie, - struct ieee80211_sta_vht_cap *vht_cap); +void +ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct ieee80211_vht_cap *vht_cap_ie, + struct sta_info *sta); +enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta); +void ieee80211_sta_set_rx_nss(struct sta_info *sta); +void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, u8 opmode, + enum ieee80211_band band, bool nss_only); + /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, @@ -1570,8 +1561,9 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local, void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, - u8 *extra, size_t extra_len, const u8 *bssid, - const u8 *da, const u8 *key, u8 key_len, u8 key_idx); + const u8 *extra, size_t extra_len, const u8 *bssid, + const u8 *da, const u8 *key, u8 key_len, u8 key_idx, + u32 tx_flags); void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, const u8 *bssid, u16 stype, u16 reason, bool send_frame, u8 *frame_buf); @@ -1588,7 +1580,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, - u32 ratemask, bool directed, bool no_cck, + u32 ratemask, bool directed, u32 tx_flags, struct ieee80211_channel *channel, bool scan); void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, @@ -1620,17 +1612,31 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, /* channel management */ void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, - struct ieee80211_ht_operation *ht_oper, + const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef); int __must_check ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, const struct cfg80211_chan_def *chandef, enum ieee80211_chanctx_mode mode); +int __must_check +ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + u32 *changed); void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata); +void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata); +void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, + bool clear); void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx); +void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *chanctx); + +void ieee80211_dfs_cac_timer(unsigned long data); +void ieee80211_dfs_cac_timer_work(struct work_struct *work); +void ieee80211_dfs_cac_cancel(struct ieee80211_local *local); +void ieee80211_dfs_radar_detected_work(struct work_struct *work); #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 09a80b5..9ed49ad 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -78,8 +78,7 @@ void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER); } -static u32 ieee80211_idle_off(struct ieee80211_local *local, - const char *reason) +static u32 __ieee80211_idle_off(struct ieee80211_local *local) { if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE)) return 0; @@ -88,7 +87,7 @@ static u32 ieee80211_idle_off(struct ieee80211_local *local, return IEEE80211_CONF_CHANGE_IDLE; } -static u32 ieee80211_idle_on(struct ieee80211_local *local) +static u32 __ieee80211_idle_on(struct ieee80211_local *local) { if (local->hw.conf.flags & IEEE80211_CONF_IDLE) return 0; @@ -99,125 +98,62 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local) return IEEE80211_CONF_CHANGE_IDLE; } -static u32 __ieee80211_recalc_idle(struct ieee80211_local *local) +static u32 __ieee80211_recalc_idle(struct ieee80211_local *local, + bool force_active) { - struct ieee80211_sub_if_data *sdata; - int count = 0; - bool working = false, scanning = false; + bool working = false, scanning, active; unsigned int led_trig_start = 0, led_trig_stop = 0; struct ieee80211_roc_work *roc; -#ifdef CONFIG_PROVE_LOCKING - WARN_ON(debug_locks && !lockdep_rtnl_is_held() && - !lockdep_is_held(&local->iflist_mtx)); -#endif lockdep_assert_held(&local->mtx); - list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) { - sdata->vif.bss_conf.idle = true; - continue; - } - - sdata->old_idle = sdata->vif.bss_conf.idle; - - /* do not count disabled managed interfaces */ - if (sdata->vif.type == NL80211_IFTYPE_STATION && - !sdata->u.mgd.associated && - !sdata->u.mgd.auth_data && - !sdata->u.mgd.assoc_data) { - sdata->vif.bss_conf.idle = true; - continue; - } - /* do not count unused IBSS interfaces */ - if (sdata->vif.type == NL80211_IFTYPE_ADHOC && - !sdata->u.ibss.ssid_len) { - sdata->vif.bss_conf.idle = true; - continue; - } - - if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) - continue; - - /* count everything else */ - sdata->vif.bss_conf.idle = false; - count++; - } + active = force_active || + !list_empty(&local->chanctx_list) || + local->monitors; if (!local->ops->remain_on_channel) { list_for_each_entry(roc, &local->roc_list, list) { working = true; - roc->sdata->vif.bss_conf.idle = false; + break; } } - sdata = rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx)); - if (sdata && !(local->hw.flags & IEEE80211_HW_SCAN_WHILE_IDLE)) { - scanning = true; - sdata->vif.bss_conf.idle = false; - } - - list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type == NL80211_IFTYPE_MONITOR || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN || - sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) - continue; - if (sdata->old_idle == sdata->vif.bss_conf.idle) - continue; - if (!ieee80211_sdata_running(sdata)) - continue; - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); - } + scanning = test_bit(SCAN_SW_SCANNING, &local->scanning) || + test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning); if (working || scanning) led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_WORK; else led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_WORK; - if (count) + if (active) led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_CONNECTED; else led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_CONNECTED; ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop); - if (working) - return ieee80211_idle_off(local, "working"); - if (scanning) - return ieee80211_idle_off(local, "scanning"); - if (!count) - return ieee80211_idle_on(local); - else - return ieee80211_idle_off(local, "in use"); + if (working || scanning || active) + return __ieee80211_idle_off(local); + return __ieee80211_idle_on(local); +} - return 0; +u32 ieee80211_idle_off(struct ieee80211_local *local) +{ + return __ieee80211_recalc_idle(local, true); } void ieee80211_recalc_idle(struct ieee80211_local *local) { - u32 chg; - - mutex_lock(&local->iflist_mtx); - chg = __ieee80211_recalc_idle(local); - mutex_unlock(&local->iflist_mtx); - if (chg) - ieee80211_hw_config(local, chg); + u32 change = __ieee80211_recalc_idle(local, false); + if (change) + ieee80211_hw_config(local, change); } static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) { - int meshhdrlen; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - meshhdrlen = (sdata->vif.type == NL80211_IFTYPE_MESH_POINT) ? 5 : 0; - - /* FIX: what would be proper limits for MTU? - * This interface uses 802.3 frames. */ - if (new_mtu < 256 || - new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6 - meshhdrlen) { + if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN) return -EINVAL; - } dev->mtu = new_mtu; return 0; @@ -369,7 +305,8 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata) } } - if ((sdata->vif.type != NL80211_IFTYPE_AP) || + if ((sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT) || !(sdata->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) { sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE; return 0; @@ -423,21 +360,19 @@ static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata) static int ieee80211_add_virtual_monitor(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - int ret = 0; + int ret; if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)) return 0; - mutex_lock(&local->iflist_mtx); + ASSERT_RTNL(); if (local->monitor_sdata) - goto out_unlock; + return 0; sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, GFP_KERNEL); - if (!sdata) { - ret = -ENOMEM; - goto out_unlock; - } + if (!sdata) + return -ENOMEM; /* set up data */ sdata->local = local; @@ -451,13 +386,13 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local) if (WARN_ON(ret)) { /* ok .. stupid driver, it asked for this! */ kfree(sdata); - goto out_unlock; + return ret; } ret = ieee80211_check_queues(sdata); if (ret) { kfree(sdata); - goto out_unlock; + return ret; } ret = ieee80211_vif_use_channel(sdata, &local->monitor_chandef, @@ -465,13 +400,14 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local) if (ret) { drv_remove_interface(local, sdata); kfree(sdata); - goto out_unlock; + return ret; } + mutex_lock(&local->iflist_mtx); rcu_assign_pointer(local->monitor_sdata, sdata); - out_unlock: mutex_unlock(&local->iflist_mtx); - return ret; + + return 0; } static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) @@ -481,14 +417,20 @@ static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)) return; + ASSERT_RTNL(); + mutex_lock(&local->iflist_mtx); sdata = rcu_dereference_protected(local->monitor_sdata, lockdep_is_held(&local->iflist_mtx)); - if (!sdata) - goto out_unlock; + if (!sdata) { + mutex_unlock(&local->iflist_mtx); + return; + } rcu_assign_pointer(local->monitor_sdata, NULL); + mutex_unlock(&local->iflist_mtx); + synchronize_net(); ieee80211_vif_release_channel(sdata); @@ -496,8 +438,6 @@ static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) drv_remove_interface(local, sdata); kfree(sdata); - out_unlock: - mutex_unlock(&local->iflist_mtx); } /* @@ -586,11 +526,13 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: - /* no need to tell driver, but set carrier */ - if (rtnl_dereference(sdata->bss->beacon)) + /* no need to tell driver, but set carrier and chanctx */ + if (rtnl_dereference(sdata->bss->beacon)) { + ieee80211_vif_vlan_copy_chanctx(sdata); netif_carrier_on(dev); - else + } else { netif_carrier_off(dev); + } break; case NL80211_IFTYPE_MONITOR: if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { @@ -613,6 +555,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) ieee80211_adjust_monitor_flags(sdata, 1); ieee80211_configure_filter(local); + mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); netif_carrier_on(dev); break; @@ -628,6 +573,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) goto err_del_interface; } + drv_add_interface_debugfs(local, sdata); + if (sdata->vif.type == NL80211_IFTYPE_AP) { local->fif_pspoll++; local->fif_probe_req++; @@ -701,10 +648,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) if (sdata->flags & IEEE80211_SDATA_PROMISC) atomic_inc(&local->iff_promiscs); - mutex_lock(&local->mtx); - hw_reconf_flags |= __ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); - if (coming_up) local->open_count++; @@ -754,7 +697,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, unsigned long flags; struct sk_buff *skb, *tmp; u32 hw_reconf_flags = 0; - int i; + int i, flushed; + struct ps_data *ps; clear_bit(SDATA_STATE_RUNNING, &sdata->state); @@ -769,6 +713,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_roc_purge(sdata); + if (sdata->vif.type == NL80211_IFTYPE_STATION) + ieee80211_mgd_stop(sdata); + /* * Remove all stations associated with this interface. * @@ -779,11 +726,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, * (because if we remove a STA after ops->remove_interface() * the driver will have removed the vif info already!) * - * This is relevant only in AP, WDS and mesh modes, since in - * all other modes we've already removed all stations when - * disconnecting etc. + * This is relevant only in WDS mode, in all other modes we've + * already removed all stations when disconnecting or similar, + * so warn otherwise. + * + * We call sta_info_flush_cleanup() later, to combine RCU waits. */ - sta_info_flush(local, sdata); + flushed = sta_info_flush_defer(sdata); + WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || + (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)); /* * Don't count this interface for promisc/allmulti while it @@ -820,6 +771,16 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&sdata->recalc_smps); + cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); + + if (sdata->wdev.cac_started) { + mutex_lock(&local->iflist_mtx); + ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->iflist_mtx); + cfg80211_cac_event(sdata->dev, NL80211_RADAR_CAC_ABORTED, + GFP_KERNEL); + } + /* APs need special treatment */ if (sdata->vif.type == NL80211_IFTYPE_AP) { struct ieee80211_sub_if_data *vlan, *tmpsdata; @@ -829,8 +790,19 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, u.vlan.list) dev_close(vlan->dev); WARN_ON(!list_empty(&sdata->u.ap.vlans)); - } else if (sdata->vif.type == NL80211_IFTYPE_STATION) { - ieee80211_mgd_stop(sdata); + } else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + /* remove all packets in parent bc_buf pointing to this dev */ + ps = &sdata->bss->ps; + + spin_lock_irqsave(&ps->bc_buf.lock, flags); + skb_queue_walk_safe(&ps->bc_buf, skb, tmp) { + if (skb->dev == sdata->dev) { + __skb_unlink(skb, &ps->bc_buf); + local->total_ps_buffered--; + ieee80211_free_txskb(&local->hw, skb); + } + } + spin_unlock_irqrestore(&ps->bc_buf.lock, flags); } if (going_down) @@ -839,6 +811,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: list_del(&sdata->u.vlan.list); + rcu_assign_pointer(sdata->vif.chanctx_conf, NULL); /* no need to tell driver */ break; case NL80211_IFTYPE_MONITOR: @@ -856,6 +829,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_adjust_monitor_flags(sdata, -1); ieee80211_configure_filter(local); + mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); break; case NL80211_IFTYPE_P2P_DEVICE: /* relies on synchronize_rcu() below */ @@ -865,19 +841,16 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&sdata->work); /* * When we get here, the interface is marked down. - * Call rcu_barrier() to wait both for the RX path - * should it be using the interface and enqueuing - * frames at this very time on another CPU, and - * for the sta free call_rcu callbacks. + * + * sta_info_flush_cleanup() requires rcu_barrier() + * first to wait for the station call_rcu() calls + * to complete, here we need at least sychronize_rcu() + * it to wait for the RX path in case it is using the + * interface and enqueuing frames at this very time on + * another CPU. */ rcu_barrier(); - - /* - * free_sta_rcu() enqueues a work for the actual - * sta cleanup, so we need to flush it while - * sdata is still valid. - */ - flush_workqueue(local->workqueue); + sta_info_flush_cleanup(sdata); skb_queue_purge(&sdata->skb_queue); @@ -887,16 +860,14 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, */ ieee80211_free_keys(sdata); + drv_remove_interface_debugfs(local, sdata); + if (going_down) drv_remove_interface(local, sdata); } sdata->bss = NULL; - mutex_lock(&local->mtx); - hw_reconf_flags |= __ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); - ieee80211_recalc_ps(local, -1); if (local->open_count == 0) { @@ -976,7 +947,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev) */ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = sdata->local; int flushed; int i; @@ -992,7 +962,7 @@ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_rmc_free(sdata); - flushed = sta_info_flush(local, sdata); + flushed = sta_info_flush(sdata); WARN_ON(flushed); } @@ -1233,6 +1203,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP: skb_queue_head_init(&sdata->u.ap.ps.bc_buf); INIT_LIST_HEAD(&sdata->u.ap.vlans); + sdata->vif.bss_conf.bssid = sdata->vif.addr; break; case NL80211_IFTYPE_P2P_CLIENT: type = NL80211_IFTYPE_STATION; @@ -1240,9 +1211,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->vif.p2p = true; /* fall through */ case NL80211_IFTYPE_STATION: + sdata->vif.bss_conf.bssid = sdata->u.mgd.bssid; ieee80211_sta_setup_sdata(sdata); break; case NL80211_IFTYPE_ADHOC: + sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid; ieee80211_ibss_setup_sdata(sdata); break; case NL80211_IFTYPE_MESH_POINT: @@ -1256,8 +1229,12 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, MONITOR_FLAG_OTHER_BSS; break; case NL80211_IFTYPE_WDS: + sdata->vif.bss_conf.bssid = NULL; + break; case NL80211_IFTYPE_AP_VLAN: + break; case NL80211_IFTYPE_P2P_DEVICE: + sdata->vif.bss_conf.bssid = sdata->vif.addr; break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: @@ -1498,6 +1475,15 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, mutex_unlock(&local->iflist_mtx); } +static void ieee80211_cleanup_sdata_stas_wk(struct work_struct *wk) +{ + struct ieee80211_sub_if_data *sdata; + + sdata = container_of(wk, struct ieee80211_sub_if_data, cleanup_stations_wk); + + ieee80211_cleanup_sdata_stas(sdata); +} + int ieee80211_if_add(struct ieee80211_local *local, const char *name, struct wireless_dev **new_wdev, enum nl80211_iftype type, struct vif_params *params) @@ -1564,15 +1550,18 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, /* initialise type-independent data */ sdata->wdev.wiphy = local->hw.wiphy; sdata->local = local; -#ifdef CONFIG_INET - sdata->arp_filter_state = true; -#endif for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) skb_queue_head_init(&sdata->fragments[i].skb_list); INIT_LIST_HEAD(&sdata->key_list); + spin_lock_init(&sdata->cleanup_stations_lock); + INIT_LIST_HEAD(&sdata->cleanup_stations); + INIT_WORK(&sdata->cleanup_stations_wk, ieee80211_cleanup_sdata_stas_wk); + INIT_DELAYED_WORK(&sdata->dfs_cac_timer_work, + ieee80211_dfs_cac_timer_work); + for (i = 0; i < IEEE80211_NUM_BANDS; i++) { struct ieee80211_supported_band *sband; sband = local->hw.wiphy->bands[i]; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 619c5d6..ef252eb 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -204,8 +204,11 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, if (idx >= 0 && idx < NUM_DEFAULT_KEYS) key = key_mtx_dereference(sdata->local, sdata->keys[idx]); - if (uni) + if (uni) { rcu_assign_pointer(sdata->default_unicast_key, key); + drv_set_default_unicast_key(sdata->local, sdata, idx); + } + if (multi) rcu_assign_pointer(sdata->default_multicast_key, key); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1b087ff..1a8591b 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -23,6 +23,7 @@ #include <linux/inetdevice.h> #include <net/net_namespace.h> #include <net/cfg80211.h> +#include <net/addrconf.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -33,8 +34,6 @@ #include "cfg.h" #include "debugfs.h" -static struct lock_class_key ieee80211_rx_skb_queue_class; - void ieee80211_configure_filter(struct ieee80211_local *local) { u64 mc; @@ -207,76 +206,10 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, u32 changed) { struct ieee80211_local *local = sdata->local; - static const u8 zero[ETH_ALEN] = { 0 }; if (!changed) return; - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - sdata->vif.bss_conf.bssid = sdata->u.mgd.bssid; - } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) - sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid; - else if (sdata->vif.type == NL80211_IFTYPE_AP) - sdata->vif.bss_conf.bssid = sdata->vif.addr; - else if (sdata->vif.type == NL80211_IFTYPE_WDS) - sdata->vif.bss_conf.bssid = NULL; - else if (ieee80211_vif_is_mesh(&sdata->vif)) { - sdata->vif.bss_conf.bssid = zero; - } else if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) { - sdata->vif.bss_conf.bssid = sdata->vif.addr; - WARN_ONCE(changed & ~(BSS_CHANGED_IDLE), - "P2P Device BSS changed %#x", changed); - } else { - WARN_ON(1); - return; - } - - switch (sdata->vif.type) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_WDS: - case NL80211_IFTYPE_MESH_POINT: - break; - default: - /* do not warn to simplify caller in scan.c */ - changed &= ~BSS_CHANGED_BEACON_ENABLED; - if (WARN_ON(changed & BSS_CHANGED_BEACON)) - return; - break; - } - - if (changed & BSS_CHANGED_BEACON_ENABLED) { - if (local->quiescing || !ieee80211_sdata_running(sdata) || - test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) { - sdata->vif.bss_conf.enable_beacon = false; - } else { - /* - * Beacon should be enabled, but AP mode must - * check whether there is a beacon configured. - */ - switch (sdata->vif.type) { - case NL80211_IFTYPE_AP: - sdata->vif.bss_conf.enable_beacon = - !!sdata->u.ap.beacon; - break; - case NL80211_IFTYPE_ADHOC: - sdata->vif.bss_conf.enable_beacon = - !!sdata->u.ibss.presp; - break; -#ifdef CONFIG_MAC80211_MESH - case NL80211_IFTYPE_MESH_POINT: - sdata->vif.bss_conf.enable_beacon = - !!sdata->u.mesh.mesh_id_len; - break; -#endif - default: - /* not reached */ - WARN_ON(1); - break; - } - } - } - drv_bss_info_changed(local, sdata, &sdata->vif.bss_conf, changed); } @@ -415,27 +348,19 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, /* Copy the addresses to the bss_conf list */ ifa = idev->ifa_list; - while (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN && ifa) { - bss_conf->arp_addr_list[c] = ifa->ifa_address; + while (ifa) { + if (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN) + bss_conf->arp_addr_list[c] = ifa->ifa_address; ifa = ifa->ifa_next; c++; } - /* If not all addresses fit the list, disable filtering */ - if (ifa) { - sdata->arp_filter_state = false; - c = 0; - } else { - sdata->arp_filter_state = true; - } bss_conf->arp_addr_cnt = c; /* Configure driver only if associated (which also implies it is up) */ - if (ifmgd->associated) { - bss_conf->arp_filter_enabled = sdata->arp_filter_state; + if (ifmgd->associated) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_ARP_FILTER); - } mutex_unlock(&ifmgd->mtx); @@ -443,6 +368,37 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, } #endif +#if IS_ENABLED(CONFIG_IPV6) +static int ieee80211_ifa6_changed(struct notifier_block *nb, + unsigned long data, void *arg) +{ + struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)arg; + struct inet6_dev *idev = ifa->idev; + struct net_device *ndev = ifa->idev->dev; + struct ieee80211_local *local = + container_of(nb, struct ieee80211_local, ifa6_notifier); + struct wireless_dev *wdev = ndev->ieee80211_ptr; + struct ieee80211_sub_if_data *sdata; + + /* Make sure it's our interface that got changed */ + if (!wdev || wdev->wiphy != local->hw.wiphy) + return NOTIFY_DONE; + + sdata = IEEE80211_DEV_TO_SUB_IF(ndev); + + /* + * For now only support station mode. This is mostly because + * doing AP would have to handle AP_VLAN in some way ... + */ + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return NOTIFY_DONE; + + drv_ipv6_addr_change(local, sdata, idev); + + return NOTIFY_DONE; +} +#endif + static int ieee80211_napi_poll(struct napi_struct *napi, int budget) { struct ieee80211_local *local = @@ -537,6 +493,7 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { .cap_info = cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40 | IEEE80211_HT_CAP_MAX_AMSDU | + IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_SGI_40), .mcs = { .rx_mask = { 0xff, 0xff, 0xff, 0xff, 0xff, @@ -544,6 +501,11 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { }, }; +static const u8 extended_capabilities[] = { + 0, 0, 0, 0, 0, 0, 0, + WLAN_EXT_CAPA8_OPMODE_NOTIF, +}; + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -600,6 +562,10 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, WIPHY_FLAG_REPORTS_OBSS | WIPHY_FLAG_OFFCHAN_TX; + wiphy->extended_capabilities = extended_capabilities; + wiphy->extended_capabilities_mask = extended_capabilities; + wiphy->extended_capabilities_len = ARRAY_SIZE(extended_capabilities); + if (ops->remain_on_channel) wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL; @@ -653,25 +619,19 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, mutex_init(&local->key_mtx); spin_lock_init(&local->filter_lock); + spin_lock_init(&local->rx_path_lock); spin_lock_init(&local->queue_stop_reason_lock); INIT_LIST_HEAD(&local->chanctx_list); mutex_init(&local->chanctx_mtx); - /* - * The rx_skb_queue is only accessed from tasklets, - * but other SKB queues are used from within IRQ - * context. Therefore, this one needs a different - * locking class so our direct, non-irq-safe use of - * the queue's lock doesn't throw lockdep warnings. - */ - skb_queue_head_init_class(&local->rx_skb_queue, - &ieee80211_rx_skb_queue_class); - INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work); INIT_WORK(&local->restart_work, ieee80211_restart_work); + INIT_WORK(&local->radar_detected_work, + ieee80211_dfs_radar_detected_work); + INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter); local->smps_mode = IEEE80211_SMPS_OFF; @@ -687,8 +647,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, spin_lock_init(&local->ack_status_lock); idr_init(&local->ack_status_frames); - /* preallocate at least one entry */ - idr_pre_get(&local->ack_status_frames, GFP_KERNEL); sta_info_init(local); @@ -747,9 +705,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) return -EINVAL; #endif - if ((hw->flags & IEEE80211_HW_SCAN_WHILE_IDLE) && !local->ops->hw_scan) - return -EINVAL; - if (!local->use_chanctx) { for (i = 0; i < local->hw.wiphy->n_iface_combinations; i++) { const struct ieee80211_iface_combination *comb; @@ -767,6 +722,16 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_WDS)) return -EINVAL; + + /* DFS currently not supported with channel context drivers */ + for (i = 0; i < local->hw.wiphy->n_iface_combinations; i++) { + const struct ieee80211_iface_combination *comb; + + comb = &local->hw.wiphy->iface_combinations[i]; + + if (comb->radar_detect_widths) + return -EINVAL; + } } /* Only HW csum features are currently compatible with mac80211 */ @@ -1049,12 +1014,25 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_ifa; #endif +#if IS_ENABLED(CONFIG_IPV6) + local->ifa6_notifier.notifier_call = ieee80211_ifa6_changed; + result = register_inet6addr_notifier(&local->ifa6_notifier); + if (result) + goto fail_ifa6; +#endif + netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll, local->hw.napi_weight); return 0; +#if IS_ENABLED(CONFIG_IPV6) + fail_ifa6: #ifdef CONFIG_INET + unregister_inetaddr_notifier(&local->ifa_notifier); +#endif +#endif +#if defined(CONFIG_INET) || defined(CONFIG_IPV6) fail_ifa: pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); @@ -1090,6 +1068,9 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) #ifdef CONFIG_INET unregister_inetaddr_notifier(&local->ifa_notifier); #endif +#if IS_ENABLED(CONFIG_IPV6) + unregister_inet6addr_notifier(&local->ifa6_notifier); +#endif rtnl_lock(); @@ -1113,7 +1094,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) wiphy_warn(local->hw.wiphy, "skb_queue not empty\n"); skb_queue_purge(&local->skb_queue); skb_queue_purge(&local->skb_queue_unreliable); - skb_queue_purge(&local->rx_skb_queue); destroy_workqueue(local->workqueue); wiphy_unregister(local->hw.wiphy); @@ -1191,8 +1171,7 @@ static void __exit ieee80211_exit(void) rc80211_minstrel_ht_exit(); rc80211_minstrel_exit(); - if (mesh_allocated) - ieee80211s_stop(); + ieee80211s_stop(); ieee80211_iface_exit(); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 1bf03f9..4749b38 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -17,19 +17,14 @@ #define TMR_RUNNING_MP 1 #define TMR_RUNNING_MPR 2 -int mesh_allocated; +static int mesh_allocated; static struct kmem_cache *rm_cache; -#ifdef CONFIG_MAC80211_MESH bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) { return (mgmt->u.action.u.mesh_action.action_code == WLAN_MESH_ACTION_HWMP_PATH_SELECTION); } -#else -bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) -{ return false; } -#endif void ieee80211s_init(void) { @@ -41,6 +36,8 @@ void ieee80211s_init(void) void ieee80211s_stop(void) { + if (!mesh_allocated) + return; mesh_pathtbl_unregister(); kmem_cache_destroy(rm_cache); } @@ -95,24 +92,22 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, (ifmsh->mesh_cc_id == ie->mesh_config->meshconf_congest) && (ifmsh->mesh_sp_id == ie->mesh_config->meshconf_synch) && (ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth))) - goto mismatch; + return false; ieee80211_sta_get_rates(local, ie, ieee80211_get_sdata_band(sdata), &basic_rates); if (sdata->vif.bss_conf.basic_rates != basic_rates) - goto mismatch; + return false; ieee80211_ht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan, ie->ht_operation, &sta_chan_def); if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef, &sta_chan_def)) - goto mismatch; + return false; return true; -mismatch: - return false; } /** @@ -123,7 +118,7 @@ mismatch: bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) { return (ie->mesh_config->meshconf_cap & - IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS) != 0; + IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS) != 0; } /** @@ -154,6 +149,31 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) return changed; } +/* + * mesh_sta_cleanup - clean up any mesh sta state + * + * @sta: mesh sta to clean up. + */ +void mesh_sta_cleanup(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + u32 changed; + + /* + * maybe userspace handles peer allocation and peering, but in either + * case the beacon is still generated by the kernel and we might need + * an update. + */ + changed = mesh_accept_plinks_update(sdata); + if (sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { + changed |= mesh_plink_deactivate(sta); + del_timer_sync(&sta->plink_timer); + } + + if (changed) + ieee80211_mbss_info_change_notify(sdata, changed); +} + int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) { int i; @@ -163,7 +183,7 @@ int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) return -ENOMEM; sdata->u.mesh.rmc->idx_mask = RMC_BUCKETS - 1; for (i = 0; i < RMC_BUCKETS; i++) - INIT_LIST_HEAD(&sdata->u.mesh.rmc->bucket[i].list); + INIT_LIST_HEAD(&sdata->u.mesh.rmc->bucket[i]); return 0; } @@ -176,11 +196,12 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata) if (!sdata->u.mesh.rmc) return; - for (i = 0; i < RMC_BUCKETS; i++) - list_for_each_entry_safe(p, n, &rmc->bucket[i].list, list) { + for (i = 0; i < RMC_BUCKETS; i++) { + list_for_each_entry_safe(p, n, &rmc->bucket[i], list) { list_del(&p->list); kmem_cache_free(rm_cache, p); } + } kfree(rmc); sdata->u.mesh.rmc = NULL; @@ -189,6 +210,7 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata) /** * mesh_rmc_check - Check frame in recent multicast cache and add if absent. * + * @sdata: interface * @sa: source address * @mesh_hdr: mesh_header * @@ -198,8 +220,8 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata) * received this frame lately. If the frame is not in the cache, it is added to * it. */ -int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, - struct ieee80211_sub_if_data *sdata) +int mesh_rmc_check(struct ieee80211_sub_if_data *sdata, + const u8 *sa, struct ieee80211s_hdr *mesh_hdr) { struct mesh_rmc *rmc = sdata->u.mesh.rmc; u32 seqnum = 0; @@ -210,15 +232,14 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, /* Don't care about endianness since only match matters */ memcpy(&seqnum, &mesh_hdr->seqnum, sizeof(mesh_hdr->seqnum)); idx = le32_to_cpu(mesh_hdr->seqnum) & rmc->idx_mask; - list_for_each_entry_safe(p, n, &rmc->bucket[idx].list, list) { + list_for_each_entry_safe(p, n, &rmc->bucket[idx], list) { ++entries; if (time_after(jiffies, p->exp_time) || - (entries == RMC_QUEUE_MAX_LEN)) { + entries == RMC_QUEUE_MAX_LEN) { list_del(&p->list); kmem_cache_free(rm_cache, p); --entries; - } else if ((seqnum == p->seqnum) && - (ether_addr_equal(sa, p->sa))) + } else if ((seqnum == p->seqnum) && ether_addr_equal(sa, p->sa)) return -1; } @@ -229,12 +250,12 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, p->seqnum = seqnum; p->exp_time = jiffies + RMC_TIMEOUT; memcpy(p->sa, sa, ETH_ALEN); - list_add(&p->list, &rmc->bucket[idx].list); + list_add(&p->list, &rmc->bucket[idx]); return 0; } -int -mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u8 *pos, neighbors; @@ -265,16 +286,18 @@ mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) /* Mesh capability */ *pos = IEEE80211_MESHCONF_CAPAB_FORWARDING; *pos |= ifmsh->accepting_plinks ? - IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; + IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; + /* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */ + *pos |= ifmsh->ps_peers_deep_sleep ? + IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL : 0x00; *pos++ |= ifmsh->adjusting_tbtt ? - IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING : 0x00; + IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING : 0x00; *pos++ = 0x00; return 0; } -int -mesh_add_meshid_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +int mesh_add_meshid_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u8 *pos; @@ -291,8 +314,31 @@ mesh_add_meshid_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) return 0; } -int -mesh_add_vendor_ies(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +static int mesh_add_awake_window_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u8 *pos; + + /* see IEEE802.11-2012 13.14.6 */ + if (ifmsh->ps_peers_light_sleep == 0 && + ifmsh->ps_peers_deep_sleep == 0 && + ifmsh->nonpeer_pm == NL80211_MESH_POWER_ACTIVE) + return 0; + + if (skb_tailroom(skb) < 4) + return -ENOMEM; + + pos = skb_put(skb, 2 + 2); + *pos++ = WLAN_EID_MESH_AWAKE_WINDOW; + *pos++ = 2; + put_unaligned_le16(ifmsh->mshcfg.dot11MeshAwakeWindowDuration, pos); + + return 0; +} + +int mesh_add_vendor_ies(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u8 offset, len; @@ -315,8 +361,7 @@ mesh_add_vendor_ies(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) return 0; } -int -mesh_add_rsn_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +int mesh_add_rsn_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u8 len = 0; @@ -344,11 +389,9 @@ mesh_add_rsn_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) return 0; } -int mesh_add_ds_params_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata) +static int mesh_add_ds_params_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { - struct ieee80211_local *local = sdata->local; - struct ieee80211_supported_band *sband; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *chan; u8 *pos; @@ -365,19 +408,16 @@ int mesh_add_ds_params_ie(struct sk_buff *skb, chan = chanctx_conf->def.chan; rcu_read_unlock(); - sband = local->hw.wiphy->bands[chan->band]; - if (sband->band == IEEE80211_BAND_2GHZ) { - pos = skb_put(skb, 2 + 1); - *pos++ = WLAN_EID_DS_PARAMS; - *pos++ = 1; - *pos++ = ieee80211_frequency_to_channel(chan->center_freq); - } + pos = skb_put(skb, 2 + 1); + *pos++ = WLAN_EID_DS_PARAMS; + *pos++ = 1; + *pos++ = ieee80211_frequency_to_channel(chan->center_freq); return 0; } -int mesh_add_ht_cap_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata) +int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; enum ieee80211_band band = ieee80211_get_sdata_band(sdata); @@ -398,8 +438,8 @@ int mesh_add_ht_cap_ie(struct sk_buff *skb, return 0; } -int mesh_add_ht_oper_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata) +int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *chanctx_conf; @@ -434,6 +474,7 @@ int mesh_add_ht_oper_ie(struct sk_buff *skb, return 0; } + static void ieee80211_mesh_path_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = @@ -479,7 +520,7 @@ void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) /** * ieee80211_fill_mesh_addresses - fill addresses of a locally originated mesh frame - * @hdr: 802.11 frame header + * @hdr: 802.11 frame header * @fc: frame control field * @meshda: destination address in the mesh * @meshsa: source address address in the mesh. Same as TA, as frame is @@ -510,8 +551,8 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, /** * ieee80211_new_mesh_header - create a new mesh header - * @meshhdr: uninitialized mesh header * @sdata: mesh interface to be used + * @meshhdr: uninitialized mesh header * @addr4or5: 1st address in the ae header, which may correspond to address 4 * (if addr6 is NULL) or address 5 (if addr6 is present). It may * be NULL. @@ -520,42 +561,49 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, * * Return the header length. */ -int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, - struct ieee80211_sub_if_data *sdata, char *addr4or5, - char *addr6) +int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata, + struct ieee80211s_hdr *meshhdr, + const char *addr4or5, const char *addr6) { - int aelen = 0; - BUG_ON(!addr4or5 && addr6); + if (WARN_ON(!addr4or5 && addr6)) + return 0; + memset(meshhdr, 0, sizeof(*meshhdr)); + meshhdr->ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; + + /* FIXME: racy -- TX on multiple queues can be concurrent */ put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &meshhdr->seqnum); sdata->u.mesh.mesh_seqnum++; + if (addr4or5 && !addr6) { meshhdr->flags |= MESH_FLAGS_AE_A4; - aelen += ETH_ALEN; memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN); + return 2 * ETH_ALEN; } else if (addr4or5 && addr6) { meshhdr->flags |= MESH_FLAGS_AE_A5_A6; - aelen += 2 * ETH_ALEN; memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN); memcpy(meshhdr->eaddr2, addr6, ETH_ALEN); + return 3 * ETH_ALEN; } - return 6 + aelen; + + return ETH_ALEN; } -static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_mesh *ifmsh) +static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u32 changed; ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); mesh_path_expire(sdata); changed = mesh_accept_plinks_update(sdata); - ieee80211_bss_info_change_notify(sdata, changed); + ieee80211_mbss_info_change_notify(sdata, changed); mod_timer(&ifmsh->housekeeping_timer, - round_jiffies(jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL)); + round_jiffies(jiffies + + IEEE80211_MESH_HOUSEKEEPING_INTERVAL)); } static void ieee80211_mesh_rootpath(struct ieee80211_sub_if_data *sdata) @@ -603,10 +651,149 @@ void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata) } #endif -void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) +static int +ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) +{ + struct beacon_data *bcn; + int head_len, tail_len; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + struct ieee80211_chanctx_conf *chanctx_conf; + enum ieee80211_band band; + u8 *pos; + struct ieee80211_sub_if_data *sdata; + int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) + + sizeof(mgmt->u.beacon); + + sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); + rcu_read_lock(); + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + band = chanctx_conf->def.chan->band; + rcu_read_unlock(); + + head_len = hdr_len + + 2 + /* NULL SSID */ + 2 + 8 + /* supported rates */ + 2 + 3; /* DS params */ + tail_len = 2 + (IEEE80211_MAX_SUPP_RATES - 8) + + 2 + sizeof(struct ieee80211_ht_cap) + + 2 + sizeof(struct ieee80211_ht_operation) + + 2 + ifmsh->mesh_id_len + + 2 + sizeof(struct ieee80211_meshconf_ie) + + 2 + sizeof(__le16) + /* awake window */ + ifmsh->ie_len; + + bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL); + /* need an skb for IE builders to operate on */ + skb = dev_alloc_skb(max(head_len, tail_len)); + + if (!bcn || !skb) + goto out_free; + + /* + * pointers go into the block we allocated, + * memory is | beacon_data | head | tail | + */ + bcn->head = ((u8 *) bcn) + sizeof(*bcn); + + /* fill in the head */ + mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); + memset(mgmt, 0, hdr_len); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_BEACON); + eth_broadcast_addr(mgmt->da); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); + ieee80211_mps_set_frame_flags(sdata, NULL, (void *) mgmt); + mgmt->u.beacon.beacon_int = + cpu_to_le16(sdata->vif.bss_conf.beacon_int); + mgmt->u.beacon.capab_info |= cpu_to_le16( + sdata->u.mesh.security ? WLAN_CAPABILITY_PRIVACY : 0); + + pos = skb_put(skb, 2); + *pos++ = WLAN_EID_SSID; + *pos++ = 0x0; + + if (ieee80211_add_srates_ie(sdata, skb, true, band) || + mesh_add_ds_params_ie(sdata, skb)) + goto out_free; + + bcn->head_len = skb->len; + memcpy(bcn->head, skb->data, bcn->head_len); + + /* now the tail */ + skb_trim(skb, 0); + bcn->tail = bcn->head + bcn->head_len; + + if (ieee80211_add_ext_srates_ie(sdata, skb, true, band) || + mesh_add_rsn_ie(sdata, skb) || + mesh_add_ht_cap_ie(sdata, skb) || + mesh_add_ht_oper_ie(sdata, skb) || + mesh_add_meshid_ie(sdata, skb) || + mesh_add_meshconf_ie(sdata, skb) || + mesh_add_awake_window_ie(sdata, skb) || + mesh_add_vendor_ies(sdata, skb)) + goto out_free; + + bcn->tail_len = skb->len; + memcpy(bcn->tail, skb->data, bcn->tail_len); + + dev_kfree_skb(skb); + rcu_assign_pointer(ifmsh->beacon, bcn); + return 0; +out_free: + kfree(bcn); + dev_kfree_skb(skb); + return -ENOMEM; +} + +static int +ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh) +{ + struct ieee80211_sub_if_data *sdata; + struct beacon_data *old_bcn; + int ret; + sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); + + mutex_lock(&ifmsh->mtx); + + old_bcn = rcu_dereference_protected(ifmsh->beacon, + lockdep_is_held(&ifmsh->mtx)); + ret = ieee80211_mesh_build_beacon(ifmsh); + if (ret) + /* just reuse old beacon */ + goto out; + + if (old_bcn) + kfree_rcu(old_bcn, rcu_head); +out: + mutex_unlock(&ifmsh->mtx); + return ret; +} + +void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, + u32 changed) +{ + if (sdata->vif.bss_conf.enable_beacon && + (changed & (BSS_CHANGED_BEACON | + BSS_CHANGED_HT | + BSS_CHANGED_BASIC_RATES | + BSS_CHANGED_BEACON_INT))) + if (ieee80211_mesh_rebuild_beacon(&sdata->u.mesh)) + return; + ieee80211_bss_info_change_notify(sdata, changed); +} + +int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; + u32 changed = BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED | + BSS_CHANGED_HT | + BSS_CHANGED_BASIC_RATES | + BSS_CHANGED_BEACON_INT; + enum ieee80211_band band = ieee80211_get_sdata_band(sdata); local->fif_other_bss++; /* mesh ifaces must set allmulti to forward mcast traffic */ @@ -624,34 +811,51 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) ieee80211_queue_work(&local->hw, &sdata->work); sdata->vif.bss_conf.ht_operation_mode = ifmsh->mshcfg.ht_opmode; - sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL; + sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.basic_rates = - ieee80211_mandatory_rates(sdata->local, - ieee80211_get_sdata_band(sdata)); - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON | - BSS_CHANGED_BEACON_ENABLED | - BSS_CHANGED_HT | - BSS_CHANGED_BASIC_RATES | - BSS_CHANGED_BEACON_INT); + ieee80211_mandatory_rates(local, band); + + changed |= ieee80211_mps_local_status_update(sdata); + + if (ieee80211_mesh_build_beacon(ifmsh)) { + ieee80211_stop_mesh(sdata); + return -ENOMEM; + } + + ieee80211_bss_info_change_notify(sdata, changed); netif_carrier_on(sdata->dev); + return 0; } void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct beacon_data *bcn; netif_carrier_off(sdata->dev); /* stop the beacon */ ifmsh->mesh_id_len = 0; + sdata->vif.bss_conf.enable_beacon = false; + clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); + mutex_lock(&ifmsh->mtx); + bcn = rcu_dereference_protected(ifmsh->beacon, + lockdep_is_held(&ifmsh->mtx)); + rcu_assign_pointer(ifmsh->beacon, NULL); + kfree_rcu(bcn, rcu_head); + mutex_unlock(&ifmsh->mtx); /* flush STAs and mpaths on this iface */ - sta_info_flush(sdata->local, sdata); + sta_info_flush(sdata); mesh_path_flush_by_iface(sdata); + /* free all potentially still buffered group-addressed frames */ + local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf); + skb_queue_purge(&ifmsh->ps.bc_buf); + del_timer_sync(&sdata->u.mesh.housekeeping_timer); del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); del_timer_sync(&sdata->u.mesh.mesh_path_timer); @@ -671,6 +875,62 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) sdata->u.mesh.timers_running = 0; } +static void +ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct sk_buff *presp; + struct beacon_data *bcn; + struct ieee80211_mgmt *hdr; + struct ieee802_11_elems elems; + size_t baselen; + u8 *pos, *end; + + end = ((u8 *) mgmt) + len; + pos = mgmt->u.probe_req.variable; + baselen = (u8 *) pos - (u8 *) mgmt; + if (baselen > len) + return; + + ieee802_11_parse_elems(pos, len - baselen, &elems); + + /* 802.11-2012 10.1.4.3.2 */ + if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) && + !is_broadcast_ether_addr(mgmt->da)) || + elems.ssid_len != 0) + return; + + if (elems.mesh_id_len != 0 && + (elems.mesh_id_len != ifmsh->mesh_id_len || + memcmp(elems.mesh_id, ifmsh->mesh_id, ifmsh->mesh_id_len))) + return; + + rcu_read_lock(); + bcn = rcu_dereference(ifmsh->beacon); + + if (!bcn) + goto out; + + presp = dev_alloc_skb(local->tx_headroom + + bcn->head_len + bcn->tail_len); + if (!presp) + goto out; + + skb_reserve(presp, local->tx_headroom); + memcpy(skb_put(presp, bcn->head_len), bcn->head, bcn->head_len); + memcpy(skb_put(presp, bcn->tail_len), bcn->tail, bcn->tail_len); + hdr = (struct ieee80211_mgmt *) presp->data; + hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_PROBE_RESP); + memcpy(hdr->da, mgmt->sa, ETH_ALEN); + IEEE80211_SKB_CB(presp)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + ieee80211_tx_skb(sdata, presp); +out: + rcu_read_unlock(); +} + static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, u16 stype, struct ieee80211_mgmt *mgmt, @@ -760,6 +1020,9 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_mesh_rx_bcn_presp(sdata, stype, mgmt, skb->len, rx_status); break; + case IEEE80211_STYPE_PROBE_REQ: + ieee80211_mesh_rx_probe_req(sdata, mgmt, skb->len); + break; case IEEE80211_STYPE_ACTION: ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status); break; @@ -782,7 +1045,7 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) mesh_mpp_table_grow(); if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags)) - ieee80211_mesh_housekeeping(sdata, ifmsh); + ieee80211_mesh_housekeeping(sdata); if (test_and_clear_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags)) ieee80211_mesh_rootpath(sdata); @@ -797,7 +1060,8 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) - if (ieee80211_vif_is_mesh(&sdata->vif)) + if (ieee80211_vif_is_mesh(&sdata->vif) && + ieee80211_sdata_running(sdata)) ieee80211_queue_work(&local->hw, &sdata->work); rcu_read_unlock(); } @@ -805,6 +1069,7 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + static u8 zero_addr[ETH_ALEN] = {}; setup_timer(&ifmsh->housekeeping_timer, ieee80211_mesh_housekeeping_timer, @@ -828,6 +1093,11 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) ieee80211_mesh_path_root_timer, (unsigned long) sdata); INIT_LIST_HEAD(&ifmsh->preq_queue.list); + skb_queue_head_init(&ifmsh->ps.bc_buf); spin_lock_init(&ifmsh->mesh_preq_queue_lock); spin_lock_init(&ifmsh->sync_offset_lock); + RCU_INIT_POINTER(ifmsh->beacon, NULL); + mutex_init(&ifmsh->mtx); + + sdata->vif.bss_conf.bssid = zero_addr; } diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 7c9215f..336c88a 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -26,12 +26,12 @@ * @MESH_PATH_ACTIVE: the mesh path can be used for forwarding * @MESH_PATH_RESOLVING: the discovery process is running for this mesh path * @MESH_PATH_SN_VALID: the mesh path contains a valid destination sequence - * number + * number * @MESH_PATH_FIXED: the mesh path has been manually set and should not be - * modified + * modified * @MESH_PATH_RESOLVED: the mesh path can has been resolved * @MESH_PATH_REQ_QUEUED: there is an unsent path request for this destination - * already queued up, waiting for the discovery process to start. + * already queued up, waiting for the discovery process to start. * * MESH_PATH_RESOLVED is used by the mesh path timer to * decide when to stop or cancel the mesh path discovery. @@ -73,16 +73,16 @@ enum mesh_deferred_task_flags { * @dst: mesh path destination mac address * @sdata: mesh subif * @next_hop: mesh neighbor to which frames for this destination will be - * forwarded + * forwarded * @timer: mesh path discovery timer * @frame_queue: pending queue for frames sent to this destination while the - * path is unresolved + * path is unresolved * @sn: target sequence number * @metric: current metric to this destination * @hop_count: hops to destination * @exp_time: in jiffies, when the path will expire or when it expired * @discovery_timeout: timeout (lapse in jiffies) used for the last discovery - * retry + * retry * @discovery_retries: number of discovery retries * @flags: mesh path flags, as specified on &enum mesh_path_flags * @state_lock: mesh path state lock used to protect changes to the @@ -184,15 +184,13 @@ struct rmc_entry { }; struct mesh_rmc { - struct rmc_entry bucket[RMC_BUCKETS]; + struct list_head bucket[RMC_BUCKETS]; u32 idx_mask; }; #define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) #define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) -#define MESH_DEFAULT_BEACON_INTERVAL 1000 /* in 1024 us units */ - #define MESH_PATH_EXPIRE (600 * HZ) /* Default maximum number of plinks per interface */ @@ -208,95 +206,113 @@ struct mesh_rmc { /* Various */ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, const u8 *da, const u8 *sa); -int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, - struct ieee80211_sub_if_data *sdata, char *addr4or5, - char *addr6); -int mesh_rmc_check(u8 *addr, struct ieee80211s_hdr *mesh_hdr, - struct ieee80211_sub_if_data *sdata); +int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata, + struct ieee80211s_hdr *meshhdr, + const char *addr4or5, const char *addr6); +int mesh_rmc_check(struct ieee80211_sub_if_data *sdata, + const u8 *addr, struct ieee80211s_hdr *mesh_hdr); bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *ie); void mesh_ids_set_default(struct ieee80211_if_mesh *mesh); -void mesh_mgmt_ies_add(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_add_meshconf_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_add_meshid_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_add_rsn_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_add_vendor_ies(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_add_ds_params_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_add_ht_cap_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_add_ht_oper_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); +void mesh_mgmt_ies_add(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +int mesh_add_meshid_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +int mesh_add_rsn_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +int mesh_add_vendor_ies(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); void ieee80211s_update_metric(struct ieee80211_local *local, - struct sta_info *sta, struct sk_buff *skb); -void ieee80211s_stop(void); + struct sta_info *sta, struct sk_buff *skb); void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata); -void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); +int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh); const struct ieee80211_mesh_sync_ops *ieee80211_mesh_sync_ops_get(u8 method); +/* wrapper for ieee80211_bss_info_change_notify() */ +void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, + u32 changed); + +/* mesh power save */ +u32 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata); +u32 ieee80211_mps_set_sta_local_pm(struct sta_info *sta, + enum nl80211_mesh_power_mode pm); +void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_hdr *hdr); +void ieee80211_mps_sta_status_update(struct sta_info *sta); +void ieee80211_mps_rx_h_sta_process(struct sta_info *sta, + struct ieee80211_hdr *hdr); +void ieee80211_mpsp_trigger_process(u8 *qc, struct sta_info *sta, + bool tx, bool acked); +void ieee80211_mps_frame_release(struct sta_info *sta, + struct ieee802_11_elems *elems); /* Mesh paths */ -int mesh_nexthop_lookup(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_nexthop_resolve(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); +int mesh_nexthop_lookup(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata); -struct mesh_path *mesh_path_lookup(u8 *dst, - struct ieee80211_sub_if_data *sdata); -struct mesh_path *mpp_path_lookup(u8 *dst, - struct ieee80211_sub_if_data *sdata); -int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata); -struct mesh_path *mesh_path_lookup_by_idx(int idx, - struct ieee80211_sub_if_data *sdata); +struct mesh_path *mesh_path_lookup(struct ieee80211_sub_if_data *sdata, + const u8 *dst); +struct mesh_path *mpp_path_lookup(struct ieee80211_sub_if_data *sdata, + const u8 *dst); +int mpp_path_add(struct ieee80211_sub_if_data *sdata, + const u8 *dst, const u8 *mpp); +struct mesh_path * +mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx); void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop); void mesh_path_expire(struct ieee80211_sub_if_data *sdata); void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len); -int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata); + struct ieee80211_mgmt *mgmt, size_t len); +int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst); int mesh_path_add_gate(struct mesh_path *mpath); int mesh_path_send_to_gates(struct mesh_path *mpath); int mesh_gate_num(struct ieee80211_sub_if_data *sdata); + /* Mesh plinks */ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, - u8 *hw_addr, - struct ieee802_11_elems *ie); + u8 *hw_addr, struct ieee802_11_elems *ie); bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); void mesh_plink_broken(struct sta_info *sta); -void mesh_plink_deactivate(struct sta_info *sta); -int mesh_plink_open(struct sta_info *sta); -void mesh_plink_block(struct sta_info *sta); +u32 mesh_plink_deactivate(struct sta_info *sta); +u32 mesh_plink_open(struct sta_info *sta); +u32 mesh_plink_block(struct sta_info *sta); void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status); +void mesh_sta_cleanup(struct sta_info *sta); /* Private interfaces */ /* Mesh tables */ void mesh_mpath_table_grow(void); void mesh_mpp_table_grow(void); /* Mesh paths */ -int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, __le16 target_rcode, - const u8 *ra, struct ieee80211_sub_if_data *sdata); +int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, + u8 ttl, const u8 *target, __le32 target_sn, + __le16 target_rcode, const u8 *ra); void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta); void mesh_path_flush_pending(struct mesh_path *mpath); void mesh_path_tx_pending(struct mesh_path *mpath); int mesh_pathtbl_init(void); void mesh_pathtbl_unregister(void); -int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata); +int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr); void mesh_path_timer(unsigned long data); void mesh_path_flush_by_nexthop(struct sta_info *sta); -void mesh_path_discard_frame(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); +void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); void mesh_path_quiesce(struct ieee80211_sub_if_data *sdata); void mesh_path_restart(struct ieee80211_sub_if_data *sdata); void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata); @@ -305,7 +321,19 @@ bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt); extern int mesh_paths_generation; #ifdef CONFIG_MAC80211_MESH -extern int mesh_allocated; +static inline +u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) +{ + atomic_inc(&sdata->u.mesh.estab_plinks); + return mesh_accept_plinks_update(sdata); +} + +static inline +u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) +{ + atomic_dec(&sdata->u.mesh.estab_plinks); + return mesh_accept_plinks_update(sdata); +} static inline int mesh_plink_free_count(struct ieee80211_sub_if_data *sdata) { @@ -337,8 +365,8 @@ void mesh_plink_quiesce(struct sta_info *sta); void mesh_plink_restart(struct sta_info *sta); void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata); void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata); +void ieee80211s_stop(void); #else -#define mesh_allocated 0 static inline void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {} static inline void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata) @@ -351,6 +379,7 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) { return false; } static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) {} +static inline void ieee80211s_stop(void) {} #endif #endif /* IEEE80211S_H */ diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 47aeee2..bdb8d3b 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -30,14 +30,14 @@ static void mesh_queue_preq(struct mesh_path *, u8); -static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) +static inline u32 u32_field_get(const u8 *preq_elem, int offset, bool ae) { if (ae) offset += 6; return get_unaligned_le32(preq_elem + offset); } -static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae) +static inline u32 u16_field_get(const u8 *preq_elem, int offset, bool ae) { if (ae) offset += 6; @@ -102,10 +102,13 @@ enum mpath_frame_type { static const u8 broadcast_addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, - u8 *orig_addr, __le32 orig_sn, u8 target_flags, u8 *target, - __le32 target_sn, const u8 *da, u8 hop_count, u8 ttl, - __le32 lifetime, __le32 metric, __le32 preq_id, - struct ieee80211_sub_if_data *sdata) + const u8 *orig_addr, __le32 orig_sn, + u8 target_flags, const u8 *target, + __le32 target_sn, const u8 *da, + u8 hop_count, u8 ttl, + __le32 lifetime, __le32 metric, + __le32 preq_id, + struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -205,6 +208,7 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; skb_set_mac_header(skb, 0); skb_set_network_header(skb, 0); @@ -215,24 +219,28 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, skb->priority = 7; info->control.vif = &sdata->vif; + info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; ieee80211_set_qos_hdr(sdata, skb); + ieee80211_mps_set_frame_flags(sdata, NULL, hdr); } /** - * mesh_send_path error - Sends a PERR mesh management frame + * mesh_path_error_tx - Sends a PERR mesh management frame * + * @ttl: allowed remaining hops * @target: broken destination * @target_sn: SN of the broken destination * @target_rcode: reason code for this PERR * @ra: node this frame is addressed to + * @sdata: local mesh subif * * Note: This function may be called with driver locks taken that the driver * also acquires in the TX path. To avoid a deadlock we don't transmit the * frame directly but add it to the pending queue instead. */ -int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, - __le16 target_rcode, const u8 *ra, - struct ieee80211_sub_if_data *sdata) +int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, + u8 ttl, const u8 *target, __le32 target_sn, + __le16 target_rcode, const u8 *ra) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -246,11 +254,13 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, return -EAGAIN; skb = dev_alloc_skb(local->tx_headroom + + IEEE80211_ENCRYPT_HEADROOM + + IEEE80211_ENCRYPT_TAILROOM + hdr_len + 2 + 15 /* PERR IE */); if (!skb) return -1; - skb_reserve(skb, local->tx_headroom); + skb_reserve(skb, local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM); mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); memset(mgmt, 0, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | @@ -350,6 +360,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, * @sdata: local mesh subif * @mgmt: mesh management frame * @hwmp_ie: hwmp information element (PREP or PREQ) + * @action: type of hwmp ie * * This function updates the path routing information to the originator and the * transmitter of a HWMP PREQ or PREP frame. @@ -361,14 +372,14 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, * path routing information is updated. */ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - u8 *hwmp_ie, enum mpath_frame_type action) + struct ieee80211_mgmt *mgmt, + const u8 *hwmp_ie, enum mpath_frame_type action) { struct ieee80211_local *local = sdata->local; struct mesh_path *mpath; struct sta_info *sta; bool fresh_info; - u8 *orig_addr, *ta; + const u8 *orig_addr, *ta; u32 orig_sn, orig_metric; unsigned long orig_lifetime, exp_time; u32 last_hop_metric, new_metric; @@ -419,7 +430,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, process = false; fresh_info = false; } else { - mpath = mesh_path_lookup(orig_addr, sdata); + mpath = mesh_path_lookup(sdata, orig_addr); if (mpath) { spin_lock_bh(&mpath->state_lock); if (mpath->flags & MESH_PATH_FIXED) @@ -434,8 +445,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, } } } else { - mesh_path_add(orig_addr, sdata); - mpath = mesh_path_lookup(orig_addr, sdata); + mesh_path_add(sdata, orig_addr); + mpath = mesh_path_lookup(sdata, orig_addr); if (!mpath) { rcu_read_unlock(); return 0; @@ -467,7 +478,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, else { fresh_info = true; - mpath = mesh_path_lookup(ta, sdata); + mpath = mesh_path_lookup(sdata, ta); if (mpath) { spin_lock_bh(&mpath->state_lock); if ((mpath->flags & MESH_PATH_FIXED) || @@ -475,8 +486,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, (last_hop_metric > mpath->metric))) fresh_info = false; } else { - mesh_path_add(ta, sdata); - mpath = mesh_path_lookup(ta, sdata); + mesh_path_add(sdata, ta); + mpath = mesh_path_lookup(sdata, ta); if (!mpath) { rcu_read_unlock(); return 0; @@ -503,11 +514,11 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, - u8 *preq_elem, u32 metric) + const u8 *preq_elem, u32 metric) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath = NULL; - u8 *target_addr, *orig_addr; + const u8 *target_addr, *orig_addr; const u8 *da; u8 target_flags, ttl, flags; u32 orig_sn, target_sn, lifetime, orig_metric; @@ -542,7 +553,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, } else if (is_broadcast_ether_addr(target_addr) && (target_flags & IEEE80211_PREQ_TO_FLAG)) { rcu_read_lock(); - mpath = mesh_path_lookup(orig_addr, sdata); + mpath = mesh_path_lookup(sdata, orig_addr); if (mpath) { if (flags & IEEE80211_PREQ_PROACTIVE_PREP_FLAG) { reply = true; @@ -557,7 +568,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); } else { rcu_read_lock(); - mpath = mesh_path_lookup(target_addr, sdata); + mpath = mesh_path_lookup(sdata, target_addr); if (mpath) { if ((!(mpath->flags & MESH_PATH_SN_VALID)) || SN_LT(mpath->sn, target_sn)) { @@ -640,11 +651,11 @@ next_hop_deref_protected(struct mesh_path *mpath) static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, - u8 *prep_elem, u32 metric) + const u8 *prep_elem, u32 metric) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; - u8 *target_addr, *orig_addr; + const u8 *target_addr, *orig_addr; u8 ttl, hopcount, flags; u8 next_hop[ETH_ALEN]; u32 target_sn, orig_sn, lifetime; @@ -667,7 +678,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, } rcu_read_lock(); - mpath = mesh_path_lookup(orig_addr, sdata); + mpath = mesh_path_lookup(sdata, orig_addr); if (mpath) spin_lock_bh(&mpath->state_lock); else @@ -703,12 +714,13 @@ fail: } static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, u8 *perr_elem) + struct ieee80211_mgmt *mgmt, + const u8 *perr_elem) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; u8 ttl; - u8 *ta, *target_addr; + const u8 *ta, *target_addr; u32 target_sn; u16 target_rcode; @@ -724,7 +736,7 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, target_rcode = PERR_IE_TARGET_RCODE(perr_elem); rcu_read_lock(); - mpath = mesh_path_lookup(target_addr, sdata); + mpath = mesh_path_lookup(sdata, target_addr); if (mpath) { struct sta_info *sta; @@ -739,9 +751,10 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, spin_unlock_bh(&mpath->state_lock); if (!ifmsh->mshcfg.dot11MeshForwarding) goto endperr; - mesh_path_error_tx(ttl, target_addr, cpu_to_le32(target_sn), + mesh_path_error_tx(sdata, ttl, target_addr, + cpu_to_le32(target_sn), cpu_to_le16(target_rcode), - broadcast_addr, sdata); + broadcast_addr); } else spin_unlock_bh(&mpath->state_lock); } @@ -750,15 +763,15 @@ endperr: } static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - struct ieee80211_rann_ie *rann) + struct ieee80211_mgmt *mgmt, + const struct ieee80211_rann_ie *rann) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct mesh_path *mpath; u8 ttl, flags, hopcount; - u8 *orig_addr; + const u8 *orig_addr; u32 orig_sn, metric, metric_txsta, interval; bool root_is_gate; @@ -789,10 +802,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, metric_txsta = airtime_link_metric_get(local, sta); - mpath = mesh_path_lookup(orig_addr, sdata); + mpath = mesh_path_lookup(sdata, orig_addr); if (!mpath) { - mesh_path_add(orig_addr, sdata); - mpath = mesh_path_lookup(orig_addr, sdata); + mesh_path_add(sdata, orig_addr); + mpath = mesh_path_lookup(sdata, orig_addr); if (!mpath) { rcu_read_unlock(); sdata->u.mesh.mshstats.dropped_frames_no_route++; @@ -849,8 +862,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len) + struct ieee80211_mgmt *mgmt, size_t len) { struct ieee802_11_elems elems; size_t baselen; @@ -994,7 +1006,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); rcu_read_lock(); - mpath = mesh_path_lookup(preq_node->dst, sdata); + mpath = mesh_path_lookup(sdata, preq_node->dst); if (!mpath) goto enddiscovery; @@ -1064,8 +1076,8 @@ enddiscovery: * Returns: 0 if the next hop was found and -ENOENT if the frame was queued. * skb is freeed here if no mpath could be allocated. */ -int mesh_nexthop_resolve(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata) +int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); @@ -1074,18 +1086,22 @@ int mesh_nexthop_resolve(struct sk_buff *skb, u8 *target_addr = hdr->addr3; int err = 0; + /* Nulls are only sent to peers for PS and should be pre-addressed */ + if (ieee80211_is_qos_nullfunc(hdr->frame_control)) + return 0; + rcu_read_lock(); - err = mesh_nexthop_lookup(skb, sdata); + err = mesh_nexthop_lookup(sdata, skb); if (!err) goto endlookup; /* no nexthop found, start resolving */ - mpath = mesh_path_lookup(target_addr, sdata); + mpath = mesh_path_lookup(sdata, target_addr); if (!mpath) { - mesh_path_add(target_addr, sdata); - mpath = mesh_path_lookup(target_addr, sdata); + mesh_path_add(sdata, target_addr); + mpath = mesh_path_lookup(sdata, target_addr); if (!mpath) { - mesh_path_discard_frame(skb, sdata); + mesh_path_discard_frame(sdata, skb); err = -ENOSPC; goto endlookup; } @@ -1102,12 +1118,13 @@ int mesh_nexthop_resolve(struct sk_buff *skb, skb_queue_tail(&mpath->frame_queue, skb); err = -ENOENT; if (skb_to_free) - mesh_path_discard_frame(skb_to_free, sdata); + mesh_path_discard_frame(sdata, skb_to_free); endlookup: rcu_read_unlock(); return err; } + /** * mesh_nexthop_lookup - put the appropriate next hop on a mesh frame. Calling * this function is considered "using" the associated mpath, so preempt a path @@ -1118,8 +1135,8 @@ endlookup: * * Returns: 0 if the next hop was found. Nonzero otherwise. */ -int mesh_nexthop_lookup(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata) +int mesh_nexthop_lookup(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct mesh_path *mpath; struct sta_info *next_hop; @@ -1128,7 +1145,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb, int err = -ENOENT; rcu_read_lock(); - mpath = mesh_path_lookup(target_addr, sdata); + mpath = mesh_path_lookup(sdata, target_addr); if (!mpath || !(mpath->flags & MESH_PATH_ACTIVE)) goto endlookup; @@ -1145,6 +1162,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb, if (next_hop) { memcpy(hdr->addr1, next_hop->sta.addr, ETH_ALEN); memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + ieee80211_mps_set_frame_flags(sdata, next_hop, hdr); err = 0; } @@ -1186,8 +1204,7 @@ void mesh_path_timer(unsigned long data) } } -void -mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) +void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u32 interval = ifmsh->mshcfg.dot11MeshHWMPRannInterval; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index aa74981..dc7c8df 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -24,9 +24,12 @@ /* Keep the mean chain length below this constant */ #define MEAN_CHAIN_LEN 2 -#define MPATH_EXPIRED(mpath) ((mpath->flags & MESH_PATH_ACTIVE) && \ - time_after(jiffies, mpath->exp_time) && \ - !(mpath->flags & MESH_PATH_FIXED)) +static inline bool mpath_expired(struct mesh_path *mpath) +{ + return (mpath->flags & MESH_PATH_ACTIVE) && + time_after(jiffies, mpath->exp_time) && + !(mpath->flags & MESH_PATH_FIXED); +} struct mpath_node { struct hlist_node list; @@ -69,9 +72,9 @@ static inline struct mesh_table *resize_dereference_mpp_paths(void) * it's used twice. So it is illegal to do * for_each_mesh_entry(rcu_dereference(...), ...) */ -#define for_each_mesh_entry(tbl, p, node, i) \ +#define for_each_mesh_entry(tbl, node, i) \ for (i = 0; i <= tbl->hash_mask; i++) \ - hlist_for_each_entry_rcu(node, p, &tbl->hash_buckets[i], list) + hlist_for_each_entry_rcu(node, &tbl->hash_buckets[i], list) static struct mesh_table *mesh_table_alloc(int size_order) @@ -136,7 +139,7 @@ static void mesh_table_free(struct mesh_table *tbl, bool free_leafs) } if (free_leafs) { spin_lock_bh(&tbl->gates_lock); - hlist_for_each_entry_safe(gate, p, q, + hlist_for_each_entry_safe(gate, q, tbl->known_gates, list) { hlist_del(&gate->list); kfree(gate); @@ -181,12 +184,12 @@ errcopy: return -ENOMEM; } -static u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, +static u32 mesh_table_hash(const u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl) { /* Use last four bytes of hw addr and interface index as hash index */ - return jhash_2words(*(u32 *)(addr+2), sdata->dev->ifindex, tbl->hash_rnd) - & tbl->hash_mask; + return jhash_2words(*(u32 *)(addr+2), sdata->dev->ifindex, + tbl->hash_rnd) & tbl->hash_mask; } @@ -212,6 +215,7 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta) hdr = (struct ieee80211_hdr *) skb->data; memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN); memcpy(hdr->addr2, mpath->sdata->vif.addr, ETH_ALEN); + ieee80211_mps_set_frame_flags(sta->sdata, sta, hdr); } spin_unlock_irqrestore(&mpath->frame_queue.lock, flags); @@ -325,20 +329,19 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, } -static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst, - struct ieee80211_sub_if_data *sdata) +static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, + struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; - struct hlist_node *n; struct hlist_head *bucket; struct mpath_node *node; bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)]; - hlist_for_each_entry_rcu(node, n, bucket, list) { + hlist_for_each_entry_rcu(node, bucket, list) { mpath = node->mpath; if (mpath->sdata == sdata && ether_addr_equal(dst, mpath->dst)) { - if (MPATH_EXPIRED(mpath)) { + if (mpath_expired(mpath)) { spin_lock_bh(&mpath->state_lock); mpath->flags &= ~MESH_PATH_ACTIVE; spin_unlock_bh(&mpath->state_lock); @@ -351,19 +354,21 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst, /** * mesh_path_lookup - look up a path in the mesh path table - * @dst: hardware address (ETH_ALEN length) of destination * @sdata: local subif + * @dst: hardware address (ETH_ALEN length) of destination * * Returns: pointer to the mesh path structure, or NULL if not found * * Locking: must be called within a read rcu section. */ -struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +struct mesh_path * +mesh_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { return mpath_lookup(rcu_dereference(mesh_paths), dst, sdata); } -struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +struct mesh_path * +mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { return mpath_lookup(rcu_dereference(mpp_paths), dst, sdata); } @@ -378,19 +383,19 @@ struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) * * Locking: must be called within a read rcu section. */ -struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data *sdata) +struct mesh_path * +mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { struct mesh_table *tbl = rcu_dereference(mesh_paths); struct mpath_node *node; - struct hlist_node *p; int i; int j = 0; - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { if (sdata && node->mpath->sdata != sdata) continue; if (j++ == idx) { - if (MPATH_EXPIRED(node->mpath)) { + if (mpath_expired(node->mpath)) { spin_lock_bh(&node->mpath->state_lock); node->mpath->flags &= ~MESH_PATH_ACTIVE; spin_unlock_bh(&node->mpath->state_lock); @@ -410,13 +415,12 @@ int mesh_path_add_gate(struct mesh_path *mpath) { struct mesh_table *tbl; struct mpath_node *gate, *new_gate; - struct hlist_node *n; int err; rcu_read_lock(); tbl = rcu_dereference(mesh_paths); - hlist_for_each_entry_rcu(gate, n, tbl->known_gates, list) + hlist_for_each_entry_rcu(gate, tbl->known_gates, list) if (gate->mpath == mpath) { err = -EEXIST; goto err_rcu; @@ -434,11 +438,10 @@ int mesh_path_add_gate(struct mesh_path *mpath) spin_lock_bh(&tbl->gates_lock); hlist_add_head_rcu(&new_gate->list, tbl->known_gates); spin_unlock_bh(&tbl->gates_lock); - rcu_read_unlock(); mpath_dbg(mpath->sdata, "Mesh path: Recorded new gate: %pM. %d known gates\n", mpath->dst, mpath->sdata->u.mesh.num_gates); - return 0; + err = 0; err_rcu: rcu_read_unlock(); return err; @@ -449,30 +452,27 @@ err_rcu: * @tbl: table which holds our list of known gates * @mpath: gate mpath * - * Returns: 0 on success - * * Locking: must be called inside rcu_read_lock() section */ -static int mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) +static void mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) { struct mpath_node *gate; - struct hlist_node *p, *q; + struct hlist_node *q; - hlist_for_each_entry_safe(gate, p, q, tbl->known_gates, list) - if (gate->mpath == mpath) { - spin_lock_bh(&tbl->gates_lock); - hlist_del_rcu(&gate->list); - kfree_rcu(gate, rcu); - spin_unlock_bh(&tbl->gates_lock); - mpath->sdata->u.mesh.num_gates--; - mpath->is_gate = false; - mpath_dbg(mpath->sdata, - "Mesh path: Deleted gate: %pM. %d known gates\n", - mpath->dst, mpath->sdata->u.mesh.num_gates); - break; - } - - return 0; + hlist_for_each_entry_safe(gate, q, tbl->known_gates, list) { + if (gate->mpath != mpath) + continue; + spin_lock_bh(&tbl->gates_lock); + hlist_del_rcu(&gate->list); + kfree_rcu(gate, rcu); + spin_unlock_bh(&tbl->gates_lock); + mpath->sdata->u.mesh.num_gates--; + mpath->is_gate = false; + mpath_dbg(mpath->sdata, + "Mesh path: Deleted gate: %pM. %d known gates\n", + mpath->dst, mpath->sdata->u.mesh.num_gates); + break; + } } /** @@ -486,14 +486,14 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata) /** * mesh_path_add - allocate and add a new path to the mesh path table - * @addr: destination address of the path (ETH_ALEN length) + * @dst: destination address of the path (ETH_ALEN length) * @sdata: local subif * * Returns: 0 on success * * State: the initial state of the new path is set to 0 */ -int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) +int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; @@ -501,7 +501,6 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) struct mesh_path *mpath, *new_mpath; struct mpath_node *node, *new_node; struct hlist_head *bucket; - struct hlist_node *n; int grow = 0; int err = 0; u32 hash_idx; @@ -547,7 +546,7 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) spin_lock(&tbl->hashwlock[hash_idx]); err = -EEXIST; - hlist_for_each_entry(node, n, bucket, list) { + hlist_for_each_entry(node, bucket, list) { mpath = node->mpath; if (mpath->sdata == sdata && ether_addr_equal(dst, mpath->dst)) @@ -628,7 +627,8 @@ void mesh_mpp_table_grow(void) write_unlock_bh(&pathtbl_resize_lock); } -int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata) +int mpp_path_add(struct ieee80211_sub_if_data *sdata, + const u8 *dst, const u8 *mpp) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; @@ -636,7 +636,6 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata) struct mesh_path *mpath, *new_mpath; struct mpath_node *node, *new_node; struct hlist_head *bucket; - struct hlist_node *n; int grow = 0; int err = 0; u32 hash_idx; @@ -676,7 +675,7 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata) spin_lock(&tbl->hashwlock[hash_idx]); err = -EEXIST; - hlist_for_each_entry(node, n, bucket, list) { + hlist_for_each_entry(node, bucket, list) { mpath = node->mpath; if (mpath->sdata == sdata && ether_addr_equal(dst, mpath->dst)) @@ -721,14 +720,13 @@ void mesh_plink_broken(struct sta_info *sta) static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct mesh_path *mpath; struct mpath_node *node; - struct hlist_node *p; struct ieee80211_sub_if_data *sdata = sta->sdata; int i; __le16 reason = cpu_to_le16(WLAN_REASON_MESH_PATH_DEST_UNREACHABLE); rcu_read_lock(); tbl = rcu_dereference(mesh_paths); - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { mpath = node->mpath; if (rcu_dereference(mpath->next_hop) == sta && mpath->flags & MESH_PATH_ACTIVE && @@ -737,9 +735,10 @@ void mesh_plink_broken(struct sta_info *sta) mpath->flags &= ~MESH_PATH_ACTIVE; ++mpath->sn; spin_unlock_bh(&mpath->state_lock); - mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl, - mpath->dst, cpu_to_le32(mpath->sn), - reason, bcast, sdata); + mesh_path_error_tx(sdata, + sdata->u.mesh.mshcfg.element_ttl, + mpath->dst, cpu_to_le32(mpath->sn), + reason, bcast); } } rcu_read_unlock(); @@ -787,13 +786,12 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) struct mesh_table *tbl; struct mesh_path *mpath; struct mpath_node *node; - struct hlist_node *p; int i; rcu_read_lock(); read_lock_bh(&pathtbl_resize_lock); tbl = resize_dereference_mesh_paths(); - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { mpath = node->mpath; if (rcu_dereference(mpath->next_hop) == sta) { spin_lock(&tbl->hashwlock[i]); @@ -810,11 +808,10 @@ static void table_flush_by_iface(struct mesh_table *tbl, { struct mesh_path *mpath; struct mpath_node *node; - struct hlist_node *p; int i; WARN_ON(!rcu_read_lock_held()); - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { mpath = node->mpath; if (mpath->sdata != sdata) continue; @@ -854,13 +851,12 @@ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) * * Returns: 0 if successful */ -int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata) +int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct mesh_table *tbl; struct mesh_path *mpath; struct mpath_node *node; struct hlist_head *bucket; - struct hlist_node *n; int hash_idx; int err = 0; @@ -870,7 +866,7 @@ int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata) bucket = &tbl->hash_buckets[hash_idx]; spin_lock(&tbl->hashwlock[hash_idx]); - hlist_for_each_entry(node, n, bucket, list) { + hlist_for_each_entry(node, bucket, list) { mpath = node->mpath; if (mpath->sdata == sdata && ether_addr_equal(addr, mpath->dst)) { @@ -915,7 +911,6 @@ void mesh_path_tx_pending(struct mesh_path *mpath) int mesh_path_send_to_gates(struct mesh_path *mpath) { struct ieee80211_sub_if_data *sdata = mpath->sdata; - struct hlist_node *n; struct mesh_table *tbl; struct mesh_path *from_mpath = mpath; struct mpath_node *gate = NULL; @@ -930,7 +925,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) if (!known_gates) return -EHOSTUNREACH; - hlist_for_each_entry_rcu(gate, n, known_gates, list) { + hlist_for_each_entry_rcu(gate, known_gates, list) { if (gate->mpath->sdata != sdata) continue; @@ -946,7 +941,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) } } - hlist_for_each_entry_rcu(gate, n, known_gates, list) + hlist_for_each_entry_rcu(gate, known_gates, list) if (gate->mpath->sdata == sdata) { mpath_dbg(sdata, "Sending to %pM\n", gate->mpath->dst); mesh_path_tx_pending(gate->mpath); @@ -963,8 +958,8 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) * * Locking: the function must me called within a rcu_read_lock region */ -void mesh_path_discard_frame(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata) +void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { kfree_skb(skb); sdata->u.mesh.mshstats.dropped_frames_no_route++; @@ -982,7 +977,7 @@ void mesh_path_flush_pending(struct mesh_path *mpath) struct sk_buff *skb; while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL) - mesh_path_discard_frame(skb, mpath->sdata); + mesh_path_discard_frame(mpath->sdata, skb); } /** @@ -1091,19 +1086,18 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata) struct mesh_table *tbl; struct mesh_path *mpath; struct mpath_node *node; - struct hlist_node *p; int i; rcu_read_lock(); tbl = rcu_dereference(mesh_paths); - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { if (node->mpath->sdata != sdata) continue; mpath = node->mpath; if ((!(mpath->flags & MESH_PATH_RESOLVING)) && (!(mpath->flags & MESH_PATH_FIXED)) && time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) - mesh_path_del(mpath->dst, mpath->sdata); + mesh_path_del(mpath->sdata, mpath->dst); } rcu_read_unlock(); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 4b274e9..07d396d 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -37,23 +37,31 @@ enum plink_event { CLS_IGNR }; -static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, - enum ieee80211_self_protected_actioncode action, - u8 *da, __le16 llid, __le16 plid, __le16 reason); +static const char * const mplstates[] = { + [NL80211_PLINK_LISTEN] = "LISTEN", + [NL80211_PLINK_OPN_SNT] = "OPN-SNT", + [NL80211_PLINK_OPN_RCVD] = "OPN-RCVD", + [NL80211_PLINK_CNF_RCVD] = "CNF_RCVD", + [NL80211_PLINK_ESTAB] = "ESTAB", + [NL80211_PLINK_HOLDING] = "HOLDING", + [NL80211_PLINK_BLOCKED] = "BLOCKED" +}; -static inline -u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) -{ - atomic_inc(&sdata->u.mesh.estab_plinks); - return mesh_accept_plinks_update(sdata); -} +static const char * const mplevents[] = { + [PLINK_UNDEFINED] = "NONE", + [OPN_ACPT] = "OPN_ACPT", + [OPN_RJCT] = "OPN_RJCT", + [OPN_IGNR] = "OPN_IGNR", + [CNF_ACPT] = "CNF_ACPT", + [CNF_RJCT] = "CNF_RJCT", + [CNF_IGNR] = "CNF_IGNR", + [CLS_ACPT] = "CLS_ACPT", + [CLS_IGNR] = "CLS_IGNR" +}; -static inline -u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) -{ - atomic_dec(&sdata->u.mesh.estab_plinks); - return mesh_accept_plinks_update(sdata); -} +static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, + enum ieee80211_self_protected_actioncode action, + u8 *da, __le16 llid, __le16 plid, __le16 reason); /** * mesh_plink_fsm_restart - restart a mesh peer link finite state machine @@ -70,27 +78,63 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta) } /* - * Allocate mesh sta entry and insert into station table + * mesh_set_short_slot_time - enable / disable ERP short slot time. + * + * The standard indirectly mandates mesh STAs to turn off short slot time by + * disallowing advertising this (802.11-2012 8.4.1.4), but that doesn't mean we + * can't be sneaky about it. Enable short slot time if all mesh STAs in the + * MBSS support ERP rates. + * + * Returns BSS_CHANGED_ERP_SLOT or 0 for no change. */ -static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, - u8 *hw_addr) +static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_local *local = sdata->local; + enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; struct sta_info *sta; + u32 erp_rates = 0, changed = 0; + int i; + bool short_slot = false; - if (sdata->local->num_sta >= MESH_MAX_PLINKS) - return NULL; + if (band == IEEE80211_BAND_5GHZ) { + /* (IEEE 802.11-2012 19.4.5) */ + short_slot = true; + goto out; + } else if (band != IEEE80211_BAND_2GHZ || + (band == IEEE80211_BAND_2GHZ && + local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) + goto out; - sta = sta_info_alloc(sdata, hw_addr, GFP_KERNEL); - if (!sta) - return NULL; + for (i = 0; i < sband->n_bitrates; i++) + if (sband->bitrates[i].flags & IEEE80211_RATE_ERP_G) + erp_rates |= BIT(i); - sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); - sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); - sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED); + if (!erp_rates) + goto out; - set_sta_flag(sta, WLAN_STA_WME); + rcu_read_lock(); + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sdata != sta->sdata || + sta->plink_state != NL80211_PLINK_ESTAB) + continue; - return sta; + short_slot = false; + if (erp_rates & sta->sta.supp_rates[band]) + short_slot = true; + else + break; + } + rcu_read_unlock(); + +out: + if (sdata->vif.bss_conf.use_short_slot != short_slot) { + sdata->vif.bss_conf.use_short_slot = short_slot; + changed = BSS_CHANGED_ERP_SLOT; + mpl_dbg(sdata, "mesh_plink %pM: ERP short slot time %d\n", + sdata->vif.addr, short_slot); + } + return changed; } /** @@ -107,7 +151,6 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct sta_info *sta; - u32 changed = 0; u16 ht_opmode; bool non_ht_sta = false, ht20_sta = false; @@ -120,23 +163,19 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) sta->plink_state != NL80211_PLINK_ESTAB) continue; - switch (sta->ch_width) { - case NL80211_CHAN_WIDTH_20_NOHT: - mpl_dbg(sdata, - "mesh_plink %pM: nonHT sta (%pM) is present\n", - sdata->vif.addr, sta->sta.addr); + if (sta->sta.bandwidth > IEEE80211_STA_RX_BW_20) + continue; + + if (!sta->sta.ht_cap.ht_supported) { + mpl_dbg(sdata, "nonHT sta (%pM) is present\n", + sta->sta.addr); non_ht_sta = true; - goto out; - case NL80211_CHAN_WIDTH_20: - mpl_dbg(sdata, - "mesh_plink %pM: HT20 sta (%pM) is present\n", - sdata->vif.addr, sta->sta.addr); - ht20_sta = true; - default: break; } + + mpl_dbg(sdata, "HT20 sta (%pM) is present\n", sta->sta.addr); + ht20_sta = true; } -out: rcu_read_unlock(); if (non_ht_sta) @@ -147,16 +186,13 @@ out: else ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONE; - if (sdata->vif.bss_conf.ht_operation_mode != ht_opmode) { - sdata->vif.bss_conf.ht_operation_mode = ht_opmode; - sdata->u.mesh.mshcfg.ht_opmode = ht_opmode; - changed = BSS_CHANGED_HT; - mpl_dbg(sdata, - "mesh_plink %pM: protection mode changed to %d\n", - sdata->vif.addr, ht_opmode); - } + if (sdata->vif.bss_conf.ht_operation_mode == ht_opmode) + return 0; - return changed; + sdata->vif.bss_conf.ht_operation_mode = ht_opmode; + sdata->u.mesh.mshcfg.ht_opmode = ht_opmode; + mpl_dbg(sdata, "selected new HT protection mode %d\n", ht_opmode); + return BSS_CHANGED_HT; } /** @@ -179,6 +215,9 @@ static u32 __mesh_plink_deactivate(struct sta_info *sta) sta->plink_state = NL80211_PLINK_BLOCKED; mesh_path_flush_by_nexthop(sta); + ieee80211_mps_sta_status_update(sta); + changed |= ieee80211_mps_local_status_update(sdata); + return changed; } @@ -189,7 +228,7 @@ static u32 __mesh_plink_deactivate(struct sta_info *sta) * * All mesh paths with this peer as next hop will be flushed */ -void mesh_plink_deactivate(struct sta_info *sta) +u32 mesh_plink_deactivate(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; u32 changed; @@ -202,12 +241,13 @@ void mesh_plink_deactivate(struct sta_info *sta) sta->reason); spin_unlock_bh(&sta->lock); - ieee80211_bss_info_change_notify(sdata, changed); + return changed; } static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, - enum ieee80211_self_protected_actioncode action, - u8 *da, __le16 llid, __le16 plid, __le16 reason) { + enum ieee80211_self_protected_actioncode action, + u8 *da, __le16 llid, __le16 plid, __le16 reason) +{ struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_tx_info *info; @@ -258,13 +298,13 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, } if (ieee80211_add_srates_ie(sdata, skb, true, band) || ieee80211_add_ext_srates_ie(sdata, skb, true, band) || - mesh_add_rsn_ie(skb, sdata) || - mesh_add_meshid_ie(skb, sdata) || - mesh_add_meshconf_ie(skb, sdata)) + mesh_add_rsn_ie(sdata, skb) || + mesh_add_meshid_ie(sdata, skb) || + mesh_add_meshconf_ie(sdata, skb)) goto free; } else { /* WLAN_SP_MESH_PEERING_CLOSE */ info->flags |= IEEE80211_TX_CTL_NO_ACK; - if (mesh_add_meshid_ie(skb, sdata)) + if (mesh_add_meshid_ie(sdata, skb)) goto free; } @@ -308,12 +348,12 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, } if (action != WLAN_SP_MESH_PEERING_CLOSE) { - if (mesh_add_ht_cap_ie(skb, sdata) || - mesh_add_ht_oper_ie(skb, sdata)) + if (mesh_add_ht_cap_ie(sdata, skb) || + mesh_add_ht_oper_ie(sdata, skb)) goto free; } - if (mesh_add_vendor_ies(skb, sdata)) + if (mesh_add_vendor_ies(sdata, skb)) goto free; ieee80211_tx_skb(sdata, skb); @@ -323,92 +363,147 @@ free: return err; } -/** - * mesh_peer_init - initialize new mesh peer and return resulting sta_info - * - * @sdata: local meshif - * @addr: peer's address - * @elems: IEs from beacon or mesh peering frame - * - * call under RCU - */ -static struct sta_info *mesh_peer_init(struct ieee80211_sub_if_data *sdata, - u8 *addr, - struct ieee802_11_elems *elems) +static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee802_11_elems *elems, bool insert) { struct ieee80211_local *local = sdata->local; enum ieee80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband; - u32 rates, basic_rates = 0; - struct sta_info *sta; - bool insert = false; + u32 rates, basic_rates = 0, changed = 0; sband = local->hw.wiphy->bands[band]; rates = ieee80211_sta_get_rates(local, elems, band, &basic_rates); - sta = sta_info_get(sdata, addr); - if (!sta) { - /* Userspace handles peer allocation when security is enabled */ - if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) { - cfg80211_notify_new_peer_candidate(sdata->dev, addr, - elems->ie_start, - elems->total_len, - GFP_ATOMIC); - return NULL; - } - - sta = mesh_plink_alloc(sdata, addr); - if (!sta) - return NULL; - insert = true; - } - spin_lock_bh(&sta->lock); sta->last_rx = jiffies; - if (sta->plink_state == NL80211_PLINK_ESTAB) { - spin_unlock_bh(&sta->lock); - return sta; - } + /* rates and capabilities don't change during peering */ + if (sta->plink_state == NL80211_PLINK_ESTAB) + goto out; + + if (sta->sta.supp_rates[band] != rates) + changed |= IEEE80211_RC_SUPP_RATES_CHANGED; sta->sta.supp_rates[band] = rates; - if (elems->ht_cap_elem && - sdata->vif.bss_conf.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) - ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, - elems->ht_cap_elem, - &sta->sta.ht_cap); - else - memset(&sta->sta.ht_cap, 0, sizeof(sta->sta.ht_cap)); - - if (elems->ht_operation) { - struct cfg80211_chan_def chandef; - - if (!(elems->ht_operation->ht_param & - IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) - sta->sta.ht_cap.cap &= - ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; - ieee80211_ht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan, - elems->ht_operation, &chandef); - sta->ch_width = chandef.width; + + if (ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, + elems->ht_cap_elem, sta)) + changed |= IEEE80211_RC_BW_CHANGED; + + /* HT peer is operating 20MHz-only */ + if (elems->ht_operation && + !(elems->ht_operation->ht_param & + IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) { + if (sta->sta.bandwidth != IEEE80211_STA_RX_BW_20) + changed |= IEEE80211_RC_BW_CHANGED; + sta->sta.bandwidth = IEEE80211_STA_RX_BW_20; } if (insert) rate_control_rate_init(sta); + else + rate_control_rate_update(local, sband, sta, changed); +out: spin_unlock_bh(&sta->lock); +} + +static struct sta_info * +__mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *hw_addr) +{ + struct sta_info *sta; - if (insert && sta_info_insert(sta)) + if (sdata->local->num_sta >= MESH_MAX_PLINKS) + return NULL; + + sta = sta_info_alloc(sdata, hw_addr, GFP_KERNEL); + if (!sta) return NULL; + sta->plink_state = NL80211_PLINK_LISTEN; + init_timer(&sta->plink_timer); + + sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); + sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); + sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED); + + set_sta_flag(sta, WLAN_STA_WME); + + return sta; +} + +static struct sta_info * +mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr, + struct ieee802_11_elems *elems) +{ + struct sta_info *sta = NULL; + + /* Userspace handles peer allocation when security is enabled */ + if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) + cfg80211_notify_new_peer_candidate(sdata->dev, addr, + elems->ie_start, + elems->total_len, + GFP_KERNEL); + else + sta = __mesh_sta_info_alloc(sdata, addr); + + return sta; +} + +/* + * mesh_sta_info_get - return mesh sta info entry for @addr. + * + * @sdata: local meshif + * @addr: peer's address + * @elems: IEs from beacon or mesh peering frame. + * + * Return existing or newly allocated sta_info under RCU read lock. + * (re)initialize with given IEs. + */ +static struct sta_info * +mesh_sta_info_get(struct ieee80211_sub_if_data *sdata, + u8 *addr, struct ieee802_11_elems *elems) __acquires(RCU) +{ + struct sta_info *sta = NULL; + + rcu_read_lock(); + sta = sta_info_get(sdata, addr); + if (sta) { + mesh_sta_info_init(sdata, sta, elems, false); + } else { + rcu_read_unlock(); + /* can't run atomic */ + sta = mesh_sta_info_alloc(sdata, addr, elems); + if (!sta) { + rcu_read_lock(); + return NULL; + } + + mesh_sta_info_init(sdata, sta, elems, true); + + if (sta_info_insert_rcu(sta)) + return NULL; + } + return sta; } +/* + * mesh_neighbour_update - update or initialize new mesh neighbor. + * + * @sdata: local meshif + * @addr: peer's address + * @elems: IEs from beacon or mesh peering frame + * + * Initiates peering if appropriate. + */ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, u8 *hw_addr, struct ieee802_11_elems *elems) { struct sta_info *sta; + u32 changed = 0; - rcu_read_lock(); - sta = mesh_peer_init(sdata, hw_addr, elems); + sta = mesh_sta_info_get(sdata, hw_addr, elems); if (!sta) goto out; @@ -417,10 +512,12 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, sdata->u.mesh.accepting_plinks && sdata->u.mesh.mshcfg.auto_open_plinks && rssi_threshold_check(sta, sdata)) - mesh_plink_open(sta); + changed = mesh_plink_open(sta); + ieee80211_mps_frame_release(sta, elems); out: rcu_read_unlock(); + ieee80211_mbss_info_change_notify(sdata, changed); } static void mesh_plink_timer(unsigned long data) @@ -504,6 +601,13 @@ static void mesh_plink_timer(unsigned long data) #ifdef CONFIG_PM void mesh_plink_quiesce(struct sta_info *sta) { + if (!ieee80211_vif_is_mesh(&sta->sdata->vif)) + return; + + /* no kernel mesh sta timers have been initialized */ + if (sta->sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) + return; + if (del_timer_sync(&sta->plink_timer)) sta->plink_timer_was_running = true; } @@ -526,13 +630,14 @@ static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout) add_timer(&sta->plink_timer); } -int mesh_plink_open(struct sta_info *sta) +u32 mesh_plink_open(struct sta_info *sta) { __le16 llid; struct ieee80211_sub_if_data *sdata = sta->sdata; + u32 changed; if (!test_sta_flag(sta, WLAN_STA_AUTH)) - return -EPERM; + return 0; spin_lock_bh(&sta->lock); get_random_bytes(&llid, 2); @@ -540,7 +645,7 @@ int mesh_plink_open(struct sta_info *sta) if (sta->plink_state != NL80211_PLINK_LISTEN && sta->plink_state != NL80211_PLINK_BLOCKED) { spin_unlock_bh(&sta->lock); - return -EBUSY; + return 0; } sta->plink_state = NL80211_PLINK_OPN_SNT; mesh_plink_timer_set(sta, sdata->u.mesh.mshcfg.dot11MeshRetryTimeout); @@ -549,13 +654,16 @@ int mesh_plink_open(struct sta_info *sta) "Mesh plink: starting establishment with %pM\n", sta->sta.addr); - return mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, - sta->sta.addr, llid, 0, 0); + /* set the non-peer mode to active during peering */ + changed = ieee80211_mps_local_status_update(sdata); + + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, + sta->sta.addr, llid, 0, 0); + return changed; } -void mesh_plink_block(struct sta_info *sta) +u32 mesh_plink_block(struct sta_info *sta) { - struct ieee80211_sub_if_data *sdata = sta->sdata; u32 changed; spin_lock_bh(&sta->lock); @@ -563,12 +671,13 @@ void mesh_plink_block(struct sta_info *sta) sta->plink_state = NL80211_PLINK_BLOCKED; spin_unlock_bh(&sta->lock); - ieee80211_bss_info_change_notify(sdata, changed); + return changed; } -void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, - size_t len, struct ieee80211_rx_status *rx_status) +void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status) { struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg; struct ieee802_11_elems elems; @@ -581,15 +690,6 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m u8 *baseaddr; u32 changed = 0; __le16 plid, llid, reason; - static const char *mplstates[] = { - [NL80211_PLINK_LISTEN] = "LISTEN", - [NL80211_PLINK_OPN_SNT] = "OPN-SNT", - [NL80211_PLINK_OPN_RCVD] = "OPN-RCVD", - [NL80211_PLINK_CNF_RCVD] = "CNF_RCVD", - [NL80211_PLINK_ESTAB] = "ESTAB", - [NL80211_PLINK_HOLDING] = "HOLDING", - [NL80211_PLINK_BLOCKED] = "BLOCKED" - }; /* need action_code, aux */ if (len < IEEE80211_MIN_ACTION_SIZE + 3) @@ -609,13 +709,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m baselen += 4; } ieee802_11_parse_elems(baseaddr, len - baselen, &elems); + if (!elems.peering) { mpl_dbg(sdata, "Mesh plink: missing necessary peer link ie\n"); return; } + if (elems.rsn_len && - sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { + sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { mpl_dbg(sdata, "Mesh plink: can't establish link with secure peer\n"); return; @@ -634,7 +736,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } if (ftype != WLAN_SP_MESH_PEERING_CLOSE && - (!elems.mesh_id || !elems.mesh_config)) { + (!elems.mesh_id || !elems.mesh_config)) { mpl_dbg(sdata, "Mesh plink: missing necessary ie\n"); return; } @@ -646,6 +748,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len == 8)) memcpy(&llid, PLINK_GET_PLID(elems.peering), 2); + /* WARNING: Only for sta pointer, is dropped & re-acquired */ rcu_read_lock(); sta = sta_info_get(sdata, mgmt->sa); @@ -749,8 +852,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } if (event == OPN_ACPT) { + rcu_read_unlock(); /* allocate sta entry if necessary and update info */ - sta = mesh_peer_init(sdata, mgmt->sa, &elems); + sta = mesh_sta_info_get(sdata, mgmt->sa, &elems); if (!sta) { mpl_dbg(sdata, "Mesh plink: failed to init peer!\n"); rcu_read_unlock(); @@ -758,11 +862,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } } - mpl_dbg(sdata, - "Mesh plink (peer, state, llid, plid, event): %pM %s %d %d %d\n", - mgmt->sa, mplstates[sta->plink_state], - le16_to_cpu(sta->llid), le16_to_cpu(sta->plid), - event); + mpl_dbg(sdata, "peer %pM in state %s got event %s\n", mgmt->sa, + mplstates[sta->plink_state], mplevents[event]); reason = 0; spin_lock_bh(&sta->lock); switch (sta->plink_state) { @@ -780,6 +881,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m sta->llid = llid; mesh_plink_timer_set(sta, mshcfg->dot11MeshRetryTimeout); + + /* set the non-peer mode to active during peering */ + changed |= ieee80211_mps_local_status_update(sdata); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, @@ -870,8 +975,12 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m spin_unlock_bh(&sta->lock); changed |= mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); + changed |= mesh_set_short_slot_time(sdata); mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); + ieee80211_mps_sta_status_update(sta); + changed |= ieee80211_mps_set_sta_local_pm(sta, + mshcfg->power_mode); break; default: spin_unlock_bh(&sta->lock); @@ -905,11 +1014,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m spin_unlock_bh(&sta->lock); changed |= mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); + changed |= mesh_set_short_slot_time(sdata); mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CONFIRM, sta->sta.addr, llid, plid, 0); + ieee80211_mps_sta_status_update(sta); + changed |= ieee80211_mps_set_sta_local_pm(sta, + mshcfg->power_mode); break; default: spin_unlock_bh(&sta->lock); @@ -928,6 +1041,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m mod_plink_timer(sta, mshcfg->dot11MeshHoldingTimeout); spin_unlock_bh(&sta->lock); changed |= mesh_set_ht_prot_mode(sdata); + changed |= mesh_set_short_slot_time(sdata); mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, sta->sta.addr, llid, plid, reason); break; @@ -976,5 +1090,5 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m rcu_read_unlock(); if (changed) - ieee80211_bss_info_change_notify(sdata, changed); + ieee80211_mbss_info_change_notify(sdata, changed); } diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c new file mode 100644 index 0000000..3b7bfc0 --- /dev/null +++ b/net/mac80211/mesh_ps.c @@ -0,0 +1,598 @@ +/* + * Copyright 2012-2013, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de> + * Copyright 2012-2013, cozybit Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "mesh.h" +#include "wme.h" + + +/* mesh PS management */ + +/** + * mps_qos_null_get - create pre-addressed QoS Null frame for mesh powersave + */ +static struct sk_buff *mps_qos_null_get(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + struct ieee80211_hdr *nullfunc; /* use 4addr header */ + struct sk_buff *skb; + int size = sizeof(*nullfunc); + __le16 fc; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + size + 2); + if (!skb) + return NULL; + skb_reserve(skb, local->hw.extra_tx_headroom); + + nullfunc = (struct ieee80211_hdr *) skb_put(skb, size); + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); + ieee80211_fill_mesh_addresses(nullfunc, &fc, sta->sta.addr, + sdata->vif.addr); + nullfunc->frame_control = fc; + nullfunc->duration_id = 0; + /* no address resolution for this frame -> set addr 1 immediately */ + memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); + memset(skb_put(skb, 2), 0, 2); /* append QoS control field */ + ieee80211_mps_set_frame_flags(sdata, sta, nullfunc); + + return skb; +} + +/** + * mps_qos_null_tx - send a QoS Null to indicate link-specific power mode + */ +static void mps_qos_null_tx(struct sta_info *sta) +{ + struct sk_buff *skb; + + skb = mps_qos_null_get(sta); + if (!skb) + return; + + mps_dbg(sta->sdata, "announcing peer-specific power mode to %pM\n", + sta->sta.addr); + + /* don't unintentionally start a MPSP */ + if (!test_sta_flag(sta, WLAN_STA_PS_STA)) { + u8 *qc = ieee80211_get_qos_ctl((void *) skb->data); + + qc[0] |= IEEE80211_QOS_CTL_EOSP; + } + + ieee80211_tx_skb(sta->sdata, skb); +} + +/** + * ieee80211_mps_local_status_update - track status of local link-specific PMs + * + * @sdata: local mesh subif + * + * sets the non-peer power mode and triggers the driver PS (re-)configuration + * Return BSS_CHANGED_BEACON if a beacon update is necessary. + */ +u32 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct sta_info *sta; + bool peering = false; + int light_sleep_cnt = 0; + int deep_sleep_cnt = 0; + u32 changed = 0; + enum nl80211_mesh_power_mode nonpeer_pm; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { + if (sdata != sta->sdata) + continue; + + switch (sta->plink_state) { + case NL80211_PLINK_OPN_SNT: + case NL80211_PLINK_OPN_RCVD: + case NL80211_PLINK_CNF_RCVD: + peering = true; + break; + case NL80211_PLINK_ESTAB: + if (sta->local_pm == NL80211_MESH_POWER_LIGHT_SLEEP) + light_sleep_cnt++; + else if (sta->local_pm == NL80211_MESH_POWER_DEEP_SLEEP) + deep_sleep_cnt++; + break; + default: + break; + } + } + rcu_read_unlock(); + + /* + * Set non-peer mode to active during peering/scanning/authentication + * (see IEEE802.11-2012 13.14.8.3). The non-peer mesh power mode is + * deep sleep if the local STA is in light or deep sleep towards at + * least one mesh peer (see 13.14.3.1). Otherwise, set it to the + * user-configured default value. + */ + if (peering) { + mps_dbg(sdata, "setting non-peer PM to active for peering\n"); + nonpeer_pm = NL80211_MESH_POWER_ACTIVE; + } else if (light_sleep_cnt || deep_sleep_cnt) { + mps_dbg(sdata, "setting non-peer PM to deep sleep\n"); + nonpeer_pm = NL80211_MESH_POWER_DEEP_SLEEP; + } else { + mps_dbg(sdata, "setting non-peer PM to user value\n"); + nonpeer_pm = ifmsh->mshcfg.power_mode; + } + + /* need update if sleep counts move between 0 and non-zero */ + if (ifmsh->nonpeer_pm != nonpeer_pm || + !ifmsh->ps_peers_light_sleep != !light_sleep_cnt || + !ifmsh->ps_peers_deep_sleep != !deep_sleep_cnt) + changed = BSS_CHANGED_BEACON; + + ifmsh->nonpeer_pm = nonpeer_pm; + ifmsh->ps_peers_light_sleep = light_sleep_cnt; + ifmsh->ps_peers_deep_sleep = deep_sleep_cnt; + + return changed; +} + +/** + * ieee80211_mps_set_sta_local_pm - set local PM towards a mesh STA + * + * @sta: mesh STA + * @pm: the power mode to set + * Return BSS_CHANGED_BEACON if a beacon update is in order. + */ +u32 ieee80211_mps_set_sta_local_pm(struct sta_info *sta, + enum nl80211_mesh_power_mode pm) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + + mps_dbg(sdata, "local STA operates in mode %d with %pM\n", + pm, sta->sta.addr); + + sta->local_pm = pm; + + /* + * announce peer-specific power mode transition + * (see IEEE802.11-2012 13.14.3.2 and 13.14.3.3) + */ + if (sta->plink_state == NL80211_PLINK_ESTAB) + mps_qos_null_tx(sta); + + return ieee80211_mps_local_status_update(sdata); +} + +/** + * ieee80211_mps_set_frame_flags - set mesh PS flags in FC (and QoS Control) + * + * @sdata: local mesh subif + * @sta: mesh STA + * @hdr: 802.11 frame header + * + * see IEEE802.11-2012 8.2.4.1.7 and 8.2.4.5.11 + * + * NOTE: sta must be given when an individually-addressed QoS frame header + * is handled, for group-addressed and management frames it is not used + */ +void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_hdr *hdr) +{ + enum nl80211_mesh_power_mode pm; + u8 *qc; + + if (WARN_ON(is_unicast_ether_addr(hdr->addr1) && + ieee80211_is_data_qos(hdr->frame_control) && + !sta)) + return; + + if (is_unicast_ether_addr(hdr->addr1) && + ieee80211_is_data_qos(hdr->frame_control) && + sta->plink_state == NL80211_PLINK_ESTAB) + pm = sta->local_pm; + else + pm = sdata->u.mesh.nonpeer_pm; + + if (pm == NL80211_MESH_POWER_ACTIVE) + hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_PM); + else + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM); + + if (!ieee80211_is_data_qos(hdr->frame_control)) + return; + + qc = ieee80211_get_qos_ctl(hdr); + + if ((is_unicast_ether_addr(hdr->addr1) && + pm == NL80211_MESH_POWER_DEEP_SLEEP) || + (is_multicast_ether_addr(hdr->addr1) && + sdata->u.mesh.ps_peers_deep_sleep > 0)) + qc[1] |= (IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8); + else + qc[1] &= ~(IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8); +} + +/** + * ieee80211_mps_sta_status_update - update buffering status of neighbor STA + * + * @sta: mesh STA + * + * called after change of peering status or non-peer/peer-specific power mode + */ +void ieee80211_mps_sta_status_update(struct sta_info *sta) +{ + enum nl80211_mesh_power_mode pm; + bool do_buffer; + + /* + * use peer-specific power mode if peering is established and the + * peer's power mode is known + */ + if (sta->plink_state == NL80211_PLINK_ESTAB && + sta->peer_pm != NL80211_MESH_POWER_UNKNOWN) + pm = sta->peer_pm; + else + pm = sta->nonpeer_pm; + + do_buffer = (pm != NL80211_MESH_POWER_ACTIVE); + + /* Don't let the same PS state be set twice */ + if (test_sta_flag(sta, WLAN_STA_PS_STA) == do_buffer) + return; + + if (do_buffer) { + set_sta_flag(sta, WLAN_STA_PS_STA); + atomic_inc(&sta->sdata->u.mesh.ps.num_sta_ps); + mps_dbg(sta->sdata, "start PS buffering frames towards %pM\n", + sta->sta.addr); + } else { + ieee80211_sta_ps_deliver_wakeup(sta); + } + + /* clear the MPSP flags for non-peers or active STA */ + if (sta->plink_state != NL80211_PLINK_ESTAB) { + clear_sta_flag(sta, WLAN_STA_MPSP_OWNER); + clear_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT); + } else if (!do_buffer) { + clear_sta_flag(sta, WLAN_STA_MPSP_OWNER); + } +} + +static void mps_set_sta_peer_pm(struct sta_info *sta, + struct ieee80211_hdr *hdr) +{ + enum nl80211_mesh_power_mode pm; + u8 *qc = ieee80211_get_qos_ctl(hdr); + + /* + * Test Power Management field of frame control (PW) and + * mesh power save level subfield of QoS control field (PSL) + * + * | PM | PSL| Mesh PM | + * +----+----+---------+ + * | 0 |Rsrv| Active | + * | 1 | 0 | Light | + * | 1 | 1 | Deep | + */ + if (ieee80211_has_pm(hdr->frame_control)) { + if (qc[1] & (IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8)) + pm = NL80211_MESH_POWER_DEEP_SLEEP; + else + pm = NL80211_MESH_POWER_LIGHT_SLEEP; + } else { + pm = NL80211_MESH_POWER_ACTIVE; + } + + if (sta->peer_pm == pm) + return; + + mps_dbg(sta->sdata, "STA %pM enters mode %d\n", + sta->sta.addr, pm); + + sta->peer_pm = pm; + + ieee80211_mps_sta_status_update(sta); +} + +static void mps_set_sta_nonpeer_pm(struct sta_info *sta, + struct ieee80211_hdr *hdr) +{ + enum nl80211_mesh_power_mode pm; + + if (ieee80211_has_pm(hdr->frame_control)) + pm = NL80211_MESH_POWER_DEEP_SLEEP; + else + pm = NL80211_MESH_POWER_ACTIVE; + + if (sta->nonpeer_pm == pm) + return; + + mps_dbg(sta->sdata, "STA %pM sets non-peer mode to %d\n", + sta->sta.addr, pm); + + sta->nonpeer_pm = pm; + + ieee80211_mps_sta_status_update(sta); +} + +/** + * ieee80211_mps_rx_h_sta_process - frame receive handler for mesh powersave + * + * @sta: STA info that transmitted the frame + * @hdr: IEEE 802.11 (QoS) Header + */ +void ieee80211_mps_rx_h_sta_process(struct sta_info *sta, + struct ieee80211_hdr *hdr) +{ + if (is_unicast_ether_addr(hdr->addr1) && + ieee80211_is_data_qos(hdr->frame_control)) { + /* + * individually addressed QoS Data/Null frames contain + * peer link-specific PS mode towards the local STA + */ + mps_set_sta_peer_pm(sta, hdr); + + /* check for mesh Peer Service Period trigger frames */ + ieee80211_mpsp_trigger_process(ieee80211_get_qos_ctl(hdr), + sta, false, false); + } else { + /* + * can only determine non-peer PS mode + * (see IEEE802.11-2012 8.2.4.1.7) + */ + mps_set_sta_nonpeer_pm(sta, hdr); + } +} + + +/* mesh PS frame release */ + +static void mpsp_trigger_send(struct sta_info *sta, bool rspi, bool eosp) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct sk_buff *skb; + struct ieee80211_hdr *nullfunc; + struct ieee80211_tx_info *info; + u8 *qc; + + skb = mps_qos_null_get(sta); + if (!skb) + return; + + nullfunc = (struct ieee80211_hdr *) skb->data; + if (!eosp) + nullfunc->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + /* + * | RSPI | EOSP | MPSP triggering | + * +------+------+--------------------+ + * | 0 | 0 | local STA is owner | + * | 0 | 1 | no MPSP (MPSP end) | + * | 1 | 0 | both STA are owner | + * | 1 | 1 | peer STA is owner | see IEEE802.11-2012 13.14.9.2 + */ + qc = ieee80211_get_qos_ctl(nullfunc); + if (rspi) + qc[1] |= (IEEE80211_QOS_CTL_RSPI >> 8); + if (eosp) + qc[0] |= IEEE80211_QOS_CTL_EOSP; + + info = IEEE80211_SKB_CB(skb); + + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | + IEEE80211_TX_CTL_REQ_TX_STATUS; + + mps_dbg(sdata, "sending MPSP trigger%s%s to %pM\n", + rspi ? " RSPI" : "", eosp ? " EOSP" : "", sta->sta.addr); + + ieee80211_tx_skb(sdata, skb); +} + +/** + * mpsp_qos_null_append - append QoS Null frame to MPSP skb queue if needed + * + * To properly end a mesh MPSP the last transmitted frame has to set the EOSP + * flag in the QoS Control field. In case the current tailing frame is not a + * QoS Data frame, append a QoS Null to carry the flag. + */ +static void mpsp_qos_null_append(struct sta_info *sta, + struct sk_buff_head *frames) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct sk_buff *new_skb, *skb = skb_peek_tail(frames); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info; + + if (ieee80211_is_data_qos(hdr->frame_control)) + return; + + new_skb = mps_qos_null_get(sta); + if (!new_skb) + return; + + mps_dbg(sdata, "appending QoS Null in MPSP towards %pM\n", + sta->sta.addr); + /* + * This frame has to be transmitted last. Assign lowest priority to + * make sure it cannot pass other frames when releasing multiple ACs. + */ + new_skb->priority = 1; + skb_set_queue_mapping(new_skb, IEEE80211_AC_BK); + ieee80211_set_qos_hdr(sdata, new_skb); + + info = IEEE80211_SKB_CB(new_skb); + info->control.vif = &sdata->vif; + info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; + + __skb_queue_tail(frames, new_skb); +} + +/** + * mps_frame_deliver - transmit frames during mesh powersave + * + * @sta: STA info to transmit to + * @n_frames: number of frames to transmit. -1 for all + */ +static void mps_frame_deliver(struct sta_info *sta, int n_frames) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + int ac; + struct sk_buff_head frames; + struct sk_buff *skb; + bool more_data = false; + + skb_queue_head_init(&frames); + + /* collect frame(s) from buffers */ + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + while (n_frames != 0) { + skb = skb_dequeue(&sta->tx_filtered[ac]); + if (!skb) { + skb = skb_dequeue( + &sta->ps_tx_buf[ac]); + if (skb) + local->total_ps_buffered--; + } + if (!skb) + break; + n_frames--; + __skb_queue_tail(&frames, skb); + } + + if (!skb_queue_empty(&sta->tx_filtered[ac]) || + !skb_queue_empty(&sta->ps_tx_buf[ac])) + more_data = true; + } + + /* nothing to send? -> EOSP */ + if (skb_queue_empty(&frames)) { + mpsp_trigger_send(sta, false, true); + return; + } + + /* in a MPSP make sure the last skb is a QoS Data frame */ + if (test_sta_flag(sta, WLAN_STA_MPSP_OWNER)) + mpsp_qos_null_append(sta, &frames); + + mps_dbg(sta->sdata, "sending %d frames to PS STA %pM\n", + skb_queue_len(&frames), sta->sta.addr); + + /* prepare collected frames for transmission */ + skb_queue_walk(&frames, skb) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *) skb->data; + + /* + * Tell TX path to send this frame even though the + * STA may still remain is PS mode after this frame + * exchange. + */ + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; + + if (more_data || !skb_queue_is_last(&frames, skb)) + hdr->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + else + hdr->frame_control &= + cpu_to_le16(~IEEE80211_FCTL_MOREDATA); + + if (skb_queue_is_last(&frames, skb) && + ieee80211_is_data_qos(hdr->frame_control)) { + u8 *qoshdr = ieee80211_get_qos_ctl(hdr); + + /* MPSP trigger frame ends service period */ + *qoshdr |= IEEE80211_QOS_CTL_EOSP; + info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + } + } + + ieee80211_add_pending_skbs(local, &frames); + sta_info_recalc_tim(sta); +} + +/** + * ieee80211_mpsp_trigger_process - track status of mesh Peer Service Periods + * + * @qc: QoS Control field + * @sta: peer to start a MPSP with + * @tx: frame was transmitted by the local STA + * @acked: frame has been transmitted successfully + * + * NOTE: active mode STA may only serve as MPSP owner + */ +void ieee80211_mpsp_trigger_process(u8 *qc, struct sta_info *sta, + bool tx, bool acked) +{ + u8 rspi = qc[1] & (IEEE80211_QOS_CTL_RSPI >> 8); + u8 eosp = qc[0] & IEEE80211_QOS_CTL_EOSP; + + if (tx) { + if (rspi && acked) + set_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT); + + if (eosp) + clear_sta_flag(sta, WLAN_STA_MPSP_OWNER); + else if (acked && + test_sta_flag(sta, WLAN_STA_PS_STA) && + !test_and_set_sta_flag(sta, WLAN_STA_MPSP_OWNER)) + mps_frame_deliver(sta, -1); + } else { + if (eosp) + clear_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT); + else if (sta->local_pm != NL80211_MESH_POWER_ACTIVE) + set_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT); + + if (rspi && !test_and_set_sta_flag(sta, WLAN_STA_MPSP_OWNER)) + mps_frame_deliver(sta, -1); + } +} + +/** + * ieee80211_mps_frame_release - release buffered frames in response to beacon + * + * @sta: mesh STA + * @elems: beacon IEs + * + * For peers if we have individually-addressed frames buffered or the peer + * indicates buffered frames, send a corresponding MPSP trigger frame. Since + * we do not evaluate the awake window duration, QoS Nulls are used as MPSP + * trigger frames. If the neighbour STA is not a peer, only send single frames. + */ +void ieee80211_mps_frame_release(struct sta_info *sta, + struct ieee802_11_elems *elems) +{ + int ac, buffer_local = 0; + bool has_buffered = false; + + /* TIM map only for LLID <= IEEE80211_MAX_AID */ + if (sta->plink_state == NL80211_PLINK_ESTAB) + has_buffered = ieee80211_check_tim(elems->tim, elems->tim_len, + le16_to_cpu(sta->llid) % IEEE80211_MAX_AID); + + if (has_buffered) + mps_dbg(sta->sdata, "%pM indicates buffered frames\n", + sta->sta.addr); + + /* only transmit to PS STA with announced, non-zero awake window */ + if (test_sta_flag(sta, WLAN_STA_PS_STA) && + (!elems->awake_window || !le16_to_cpu(*elems->awake_window))) + return; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + buffer_local += skb_queue_len(&sta->ps_tx_buf[ac]) + + skb_queue_len(&sta->tx_filtered[ac]); + + if (!has_buffered && !buffer_local) + return; + + if (sta->plink_state == NL80211_PLINK_ESTAB) + mpsp_trigger_send(sta, has_buffered, !buffer_local); + else + mps_frame_deliver(sta, 1); +} diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index aa8d1e4..05a256b 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -43,7 +43,7 @@ struct sync_method { static bool mesh_peer_tbtt_adjusting(struct ieee802_11_elems *ie) { return (ie->mesh_config->meshconf_cap & - IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING) != 0; + IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING) != 0; } void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata) @@ -112,7 +112,8 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (elems->mesh_config && mesh_peer_tbtt_adjusting(elems)) { clear_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN); - msync_dbg(sdata, "STA %pM : is adjusting TBTT\n", sta->sta.addr); + msync_dbg(sdata, "STA %pM : is adjusting TBTT\n", + sta->sta.addr); goto no_sync; } @@ -129,18 +130,15 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, sta->t_offset = t_t - t_r; if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { - s64 t_clockdrift = sta->t_offset_setpoint - - sta->t_offset; + s64 t_clockdrift = sta->t_offset_setpoint - sta->t_offset; msync_dbg(sdata, "STA %pM : sta->t_offset=%lld, sta->t_offset_setpoint=%lld, t_clockdrift=%lld\n", - sta->sta.addr, - (long long) sta->t_offset, - (long long) - sta->t_offset_setpoint, + sta->sta.addr, (long long) sta->t_offset, + (long long) sta->t_offset_setpoint, (long long) t_clockdrift); if (t_clockdrift > TOFFSET_MAXIMUM_ADJUSTMENT || - t_clockdrift < -TOFFSET_MAXIMUM_ADJUSTMENT) { + t_clockdrift < -TOFFSET_MAXIMUM_ADJUSTMENT) { msync_dbg(sdata, "STA %pM : t_clockdrift=%lld too large, setpoint reset\n", sta->sta.addr, @@ -149,15 +147,10 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, goto no_sync; } - rcu_read_unlock(); - spin_lock_bh(&ifmsh->sync_offset_lock); - if (t_clockdrift > - ifmsh->sync_offset_clockdrift_max) - ifmsh->sync_offset_clockdrift_max - = t_clockdrift; + if (t_clockdrift > ifmsh->sync_offset_clockdrift_max) + ifmsh->sync_offset_clockdrift_max = t_clockdrift; spin_unlock_bh(&ifmsh->sync_offset_lock); - } else { sta->t_offset_setpoint = sta->t_offset - TOFFSET_SET_MARGIN; set_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN); @@ -165,9 +158,7 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, "STA %pM : offset was invalid, sta->t_offset=%lld\n", sta->sta.addr, (long long) sta->t_offset); - rcu_read_unlock(); } - return; no_sync: rcu_read_unlock(); @@ -177,14 +168,12 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - WARN_ON(ifmsh->mesh_sp_id - != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET); + WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET); BUG_ON(!rcu_read_lock_held()); spin_lock_bh(&ifmsh->sync_offset_lock); - if (ifmsh->sync_offset_clockdrift_max > - TOFFSET_MINIMUM_ADJUSTMENT) { + if (ifmsh->sync_offset_clockdrift_max > TOFFSET_MINIMUM_ADJUSTMENT) { /* Since ajusting the tsf here would * require a possibly blocking call * to the driver tsf setter, we punt @@ -193,8 +182,7 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata) msync_dbg(sdata, "TBTT : kicking off TBTT adjustment with clockdrift_max=%lld\n", ifmsh->sync_offset_clockdrift_max); - set_bit(MESH_WORK_DRIFT_ADJUST, - &ifmsh->wrkq_flags); + set_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags); ifmsh->adjusting_tbtt = true; } else { @@ -220,14 +208,11 @@ static const struct sync_method sync_methods[] = { const struct ieee80211_mesh_sync_ops *ieee80211_mesh_sync_ops_get(u8 method) { - const struct ieee80211_mesh_sync_ops *ops = NULL; - u8 i; + int i; for (i = 0 ; i < ARRAY_SIZE(sync_methods); ++i) { - if (sync_methods[i].method == method) { - ops = &sync_methods[i].ops; - break; - } + if (sync_methods[i].method == method) + return &sync_methods[i].ops; } - return ops; + return NULL; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7753a9c..346ad4c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -30,11 +30,13 @@ #include "rate.h" #include "led.h" -#define IEEE80211_AUTH_TIMEOUT (HZ / 5) -#define IEEE80211_AUTH_MAX_TRIES 3 -#define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5) -#define IEEE80211_ASSOC_TIMEOUT (HZ / 5) -#define IEEE80211_ASSOC_MAX_TRIES 3 +#define IEEE80211_AUTH_TIMEOUT (HZ / 5) +#define IEEE80211_AUTH_TIMEOUT_SHORT (HZ / 10) +#define IEEE80211_AUTH_MAX_TRIES 3 +#define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5) +#define IEEE80211_ASSOC_TIMEOUT (HZ / 5) +#define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) +#define IEEE80211_ASSOC_MAX_TRIES 3 static int max_nullfunc_tries = 2; module_param(max_nullfunc_tries, int, 0644); @@ -112,6 +114,9 @@ enum rx_mgmt_action { /* caller must call cfg80211_send_assoc_timeout() */ RX_MGMT_CFG80211_ASSOC_TIMEOUT, + + /* used when a processed beacon causes a deauth */ + RX_MGMT_CFG80211_TX_DEAUTH, }; /* utils */ @@ -172,79 +177,331 @@ static int ecw2cw(int ecw) return (1 << ecw) - 1; } -static u32 ieee80211_config_ht_tx(struct ieee80211_sub_if_data *sdata, - struct ieee80211_ht_operation *ht_oper, - const u8 *bssid, bool reconfig) +static u32 chandef_downgrade(struct cfg80211_chan_def *c) +{ + u32 ret; + int tmp; + + switch (c->width) { + case NL80211_CHAN_WIDTH_20: + c->width = NL80211_CHAN_WIDTH_20_NOHT; + ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + break; + case NL80211_CHAN_WIDTH_40: + c->width = NL80211_CHAN_WIDTH_20; + c->center_freq1 = c->chan->center_freq; + ret = IEEE80211_STA_DISABLE_40MHZ | + IEEE80211_STA_DISABLE_VHT; + break; + case NL80211_CHAN_WIDTH_80: + tmp = (30 + c->chan->center_freq - c->center_freq1)/20; + /* n_P40 */ + tmp /= 2; + /* freq_P40 */ + c->center_freq1 = c->center_freq1 - 20 + 40 * tmp; + c->width = NL80211_CHAN_WIDTH_40; + ret = IEEE80211_STA_DISABLE_VHT; + break; + case NL80211_CHAN_WIDTH_80P80: + c->center_freq2 = 0; + c->width = NL80211_CHAN_WIDTH_80; + ret = IEEE80211_STA_DISABLE_80P80MHZ | + IEEE80211_STA_DISABLE_160MHZ; + break; + case NL80211_CHAN_WIDTH_160: + /* n_P20 */ + tmp = (70 + c->chan->center_freq - c->center_freq1)/20; + /* n_P80 */ + tmp /= 4; + c->center_freq1 = c->center_freq1 - 40 + 80 * tmp; + c->width = NL80211_CHAN_WIDTH_80; + ret = IEEE80211_STA_DISABLE_80P80MHZ | + IEEE80211_STA_DISABLE_160MHZ; + break; + default: + case NL80211_CHAN_WIDTH_20_NOHT: + WARN_ON_ONCE(1); + c->width = NL80211_CHAN_WIDTH_20_NOHT; + ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + break; + } + + WARN_ON_ONCE(!cfg80211_chandef_valid(c)); + + return ret; +} + +static u32 +ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + struct ieee80211_channel *channel, + const struct ieee80211_ht_operation *ht_oper, + const struct ieee80211_vht_operation *vht_oper, + struct cfg80211_chan_def *chandef, bool verbose) +{ + struct cfg80211_chan_def vht_chandef; + u32 ht_cfreq, ret; + + chandef->chan = channel; + chandef->width = NL80211_CHAN_WIDTH_20_NOHT; + chandef->center_freq1 = channel->center_freq; + chandef->center_freq2 = 0; + + if (!ht_oper || !sband->ht_cap.ht_supported) { + ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + goto out; + } + + chandef->width = NL80211_CHAN_WIDTH_20; + + ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan, + channel->band); + /* check that channel matches the right operating channel */ + if (channel->center_freq != ht_cfreq) { + /* + * It's possible that some APs are confused here; + * Netgear WNDR3700 sometimes reports 4 higher than + * the actual channel in association responses, but + * since we look at probe response/beacon data here + * it should be OK. + */ + if (verbose) + sdata_info(sdata, + "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", + channel->center_freq, ht_cfreq, + ht_oper->primary_chan, channel->band); + ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + goto out; + } + + /* check 40 MHz support, if we have it */ + if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) { + switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { + case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: + chandef->width = NL80211_CHAN_WIDTH_40; + chandef->center_freq1 += 10; + break; + case IEEE80211_HT_PARAM_CHA_SEC_BELOW: + chandef->width = NL80211_CHAN_WIDTH_40; + chandef->center_freq1 -= 10; + break; + } + } else { + /* 40 MHz (and 80 MHz) must be supported for VHT */ + ret = IEEE80211_STA_DISABLE_VHT; + goto out; + } + + if (!vht_oper || !sband->vht_cap.vht_supported) { + ret = IEEE80211_STA_DISABLE_VHT; + goto out; + } + + vht_chandef.chan = channel; + vht_chandef.center_freq1 = + ieee80211_channel_to_frequency(vht_oper->center_freq_seg1_idx, + channel->band); + vht_chandef.center_freq2 = 0; + + if (vht_oper->center_freq_seg2_idx) + vht_chandef.center_freq2 = + ieee80211_channel_to_frequency( + vht_oper->center_freq_seg2_idx, + channel->band); + + switch (vht_oper->chan_width) { + case IEEE80211_VHT_CHANWIDTH_USE_HT: + vht_chandef.width = chandef->width; + break; + case IEEE80211_VHT_CHANWIDTH_80MHZ: + vht_chandef.width = NL80211_CHAN_WIDTH_80; + break; + case IEEE80211_VHT_CHANWIDTH_160MHZ: + vht_chandef.width = NL80211_CHAN_WIDTH_160; + break; + case IEEE80211_VHT_CHANWIDTH_80P80MHZ: + vht_chandef.width = NL80211_CHAN_WIDTH_80P80; + break; + default: + if (verbose) + sdata_info(sdata, + "AP VHT operation IE has invalid channel width (%d), disable VHT\n", + vht_oper->chan_width); + ret = IEEE80211_STA_DISABLE_VHT; + goto out; + } + + if (!cfg80211_chandef_valid(&vht_chandef)) { + if (verbose) + sdata_info(sdata, + "AP VHT information is invalid, disable VHT\n"); + ret = IEEE80211_STA_DISABLE_VHT; + goto out; + } + + if (cfg80211_chandef_identical(chandef, &vht_chandef)) { + ret = 0; + goto out; + } + + if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) { + if (verbose) + sdata_info(sdata, + "AP VHT information doesn't match HT, disable VHT\n"); + ret = IEEE80211_STA_DISABLE_VHT; + goto out; + } + + *chandef = vht_chandef; + + ret = 0; + +out: + /* don't print the message below for VHT mismatch if VHT is disabled */ + if (ret & IEEE80211_STA_DISABLE_VHT) + vht_chandef = *chandef; + + while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, + IEEE80211_CHAN_DISABLED)) { + if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { + ret = IEEE80211_STA_DISABLE_HT | + IEEE80211_STA_DISABLE_VHT; + goto out; + } + + ret |= chandef_downgrade(chandef); + } + + if (chandef->width != vht_chandef.width && verbose) + sdata_info(sdata, + "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n"); + + WARN_ON_ONCE(!cfg80211_chandef_valid(chandef)); + return ret; +} + +static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + const struct ieee80211_ht_operation *ht_oper, + const struct ieee80211_vht_operation *vht_oper, + const u8 *bssid, u32 *changed) { struct ieee80211_local *local = sdata->local; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_supported_band *sband; - struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *chan; - struct sta_info *sta; - u32 changed = 0; + struct cfg80211_chan_def chandef; u16 ht_opmode; - bool disable_40 = false; + u32 flags; + enum ieee80211_sta_rx_bandwidth new_sta_bw; + int ret; - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (WARN_ON(!chanctx_conf)) { - rcu_read_unlock(); + /* if HT was/is disabled, don't track any bandwidth changes */ + if (ifmgd->flags & IEEE80211_STA_DISABLE_HT || !ht_oper) return 0; - } - chan = chanctx_conf->def.chan; - rcu_read_unlock(); + + /* don't check VHT if we associated as non-VHT station */ + if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT) + vht_oper = NULL; + + if (WARN_ON_ONCE(!sta)) + return -EINVAL; + + chan = sdata->vif.bss_conf.chandef.chan; sband = local->hw.wiphy->bands[chan->band]; - switch (sdata->vif.bss_conf.chandef.width) { + /* calculate new channel (type) based on HT/VHT operation IEs */ + flags = ieee80211_determine_chantype(sdata, sband, chan, ht_oper, + vht_oper, &chandef, false); + + /* + * Downgrade the new channel if we associated with restricted + * capabilities. For example, if we associated as a 20 MHz STA + * to a 40 MHz AP (due to regulatory, capabilities or config + * reasons) then switching to a 40 MHz channel now won't do us + * any good -- we couldn't use it with the AP. + */ + if (ifmgd->flags & IEEE80211_STA_DISABLE_80P80MHZ && + chandef.width == NL80211_CHAN_WIDTH_80P80) + flags |= chandef_downgrade(&chandef); + if (ifmgd->flags & IEEE80211_STA_DISABLE_160MHZ && + chandef.width == NL80211_CHAN_WIDTH_160) + flags |= chandef_downgrade(&chandef); + if (ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ && + chandef.width > NL80211_CHAN_WIDTH_20) + flags |= chandef_downgrade(&chandef); + + if (cfg80211_chandef_identical(&chandef, &sdata->vif.bss_conf.chandef)) + return 0; + + sdata_info(sdata, + "AP %pM changed bandwidth, new config is %d MHz, width %d (%d/%d MHz)\n", + ifmgd->bssid, chandef.chan->center_freq, chandef.width, + chandef.center_freq1, chandef.center_freq2); + + if (flags != (ifmgd->flags & (IEEE80211_STA_DISABLE_HT | + IEEE80211_STA_DISABLE_VHT | + IEEE80211_STA_DISABLE_40MHZ | + IEEE80211_STA_DISABLE_80P80MHZ | + IEEE80211_STA_DISABLE_160MHZ)) || + !cfg80211_chandef_valid(&chandef)) { + sdata_info(sdata, + "AP %pM changed bandwidth in a way we can't support - disconnect\n", + ifmgd->bssid); + return -EINVAL; + } + + switch (chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + new_sta_bw = IEEE80211_STA_RX_BW_20; + break; case NL80211_CHAN_WIDTH_40: - if (sdata->vif.bss_conf.chandef.chan->center_freq > - sdata->vif.bss_conf.chandef.center_freq1 && - chan->flags & IEEE80211_CHAN_NO_HT40PLUS) - disable_40 = true; - if (sdata->vif.bss_conf.chandef.chan->center_freq < - sdata->vif.bss_conf.chandef.center_freq1 && - chan->flags & IEEE80211_CHAN_NO_HT40MINUS) - disable_40 = true; + new_sta_bw = IEEE80211_STA_RX_BW_40; break; - default: + case NL80211_CHAN_WIDTH_80: + new_sta_bw = IEEE80211_STA_RX_BW_80; + break; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + new_sta_bw = IEEE80211_STA_RX_BW_160; break; + default: + return -EINVAL; } - /* This can change during the lifetime of the BSS */ - if (!(ht_oper->ht_param & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) - disable_40 = true; + if (new_sta_bw > sta->cur_max_bandwidth) + new_sta_bw = sta->cur_max_bandwidth; - mutex_lock(&local->sta_mtx); - sta = sta_info_get(sdata, bssid); - - WARN_ON_ONCE(!sta); - - if (sta && !sta->supports_40mhz) - disable_40 = true; - - if (sta && (!reconfig || - (disable_40 != !(sta->sta.ht_cap.cap & - IEEE80211_HT_CAP_SUP_WIDTH_20_40)))) { + if (new_sta_bw < sta->sta.bandwidth) { + sta->sta.bandwidth = new_sta_bw; + rate_control_rate_update(local, sband, sta, + IEEE80211_RC_BW_CHANGED); + } - if (disable_40) - sta->sta.ht_cap.cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; - else - sta->sta.ht_cap.cap |= IEEE80211_HT_CAP_SUP_WIDTH_20_40; + ret = ieee80211_vif_change_bandwidth(sdata, &chandef, changed); + if (ret) { + sdata_info(sdata, + "AP %pM changed bandwidth to incompatible one - disconnect\n", + ifmgd->bssid); + return ret; + } + if (new_sta_bw > sta->sta.bandwidth) { + sta->sta.bandwidth = new_sta_bw; rate_control_rate_update(local, sband, sta, IEEE80211_RC_BW_CHANGED); } - mutex_unlock(&local->sta_mtx); ht_opmode = le16_to_cpu(ht_oper->operation_mode); /* if bss configuration changed store the new one */ - if (!reconfig || (sdata->vif.bss_conf.ht_operation_mode != ht_opmode)) { - changed |= BSS_CHANGED_HT; + if (sdata->vif.bss_conf.ht_operation_mode != ht_opmode) { + *changed |= BSS_CHANGED_HT; sdata->vif.bss_conf.ht_operation_mode = ht_opmode; } - return changed; + return 0; } /* frame sending functions */ @@ -341,11 +598,13 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata, static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, - struct ieee80211_supported_band *sband) + struct ieee80211_supported_band *sband, + struct ieee80211_vht_cap *ap_vht_cap) { u8 *pos; u32 cap; struct ieee80211_sta_vht_cap vht_cap; + int i; BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap)); @@ -364,6 +623,45 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, cap &= ~IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ; } + /* + * Some APs apparently get confused if our capabilities are better + * than theirs, so restrict what we advertise in the assoc request. + */ + if (!(ap_vht_cap->vht_cap_info & + cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE))) + cap &= ~IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE; + + if (!(ap_vht_cap->vht_cap_info & + cpu_to_le32(IEEE80211_VHT_CAP_TXSTBC))) + cap &= ~(IEEE80211_VHT_CAP_RXSTBC_1 | + IEEE80211_VHT_CAP_RXSTBC_3 | + IEEE80211_VHT_CAP_RXSTBC_4); + + for (i = 0; i < 8; i++) { + int shift = i * 2; + u16 mask = IEEE80211_VHT_MCS_NOT_SUPPORTED << shift; + u16 ap_mcs, our_mcs; + + ap_mcs = (le16_to_cpu(ap_vht_cap->supp_mcs.tx_mcs_map) & + mask) >> shift; + our_mcs = (le16_to_cpu(vht_cap.vht_mcs.rx_mcs_map) & + mask) >> shift; + + if (our_mcs == IEEE80211_VHT_MCS_NOT_SUPPORTED) + continue; + + switch (ap_mcs) { + default: + if (our_mcs <= ap_mcs) + break; + /* fall through */ + case IEEE80211_VHT_MCS_NOT_SUPPORTED: + vht_cap.vht_mcs.rx_mcs_map &= cpu_to_le16(~mask); + vht_cap.vht_mcs.rx_mcs_map |= + cpu_to_le16(ap_mcs << shift); + } + } + /* reserve and fill IE */ pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2); ieee80211_ie_build_vht_cap(pos, &vht_cap, cap); @@ -562,7 +860,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) sband, chan, sdata->smps_mode); if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) - ieee80211_add_vht_ie(sdata, skb, sband); + ieee80211_add_vht_ie(sdata, skb, sband, + &assoc_data->ap_vht_cap); /* if present, add any custom non-vendor IEs that go after HT */ if (assoc_data->ie_len && assoc_data->ie) { @@ -605,6 +904,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) drv_mgd_prepare_tx(local, sdata); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_INTFL_MLME_CONN_TX; ieee80211_tx_skb(sdata, skb); } @@ -641,7 +943,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, if (powersave) nullfunc->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM); - IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | + IEEE80211_TX_INTFL_OFFCHAN_TX_OK; if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | IEEE80211_STA_CONNECTION_POLL)) IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_USE_MINRATE; @@ -745,10 +1048,10 @@ static void ieee80211_chswitch_timer(unsigned long data) ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); } -void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel_sw_ie *sw_elem, - struct ieee80211_bss *bss, - u64 timestamp) +void +ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_channel_sw_ie *sw_elem, + struct ieee80211_bss *bss, u64 timestamp) { struct cfg80211_bss *cbss = container_of((void *)bss, struct cfg80211_bss, priv); @@ -907,39 +1210,6 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, return 0; } -void ieee80211_enable_dyn_ps(struct ieee80211_vif *vif) -{ - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_local *local = sdata->local; - struct ieee80211_conf *conf = &local->hw.conf; - - WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION || - !(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) || - (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)); - - local->disable_dynamic_ps = false; - conf->dynamic_ps_timeout = local->dynamic_ps_user_timeout; -} -EXPORT_SYMBOL(ieee80211_enable_dyn_ps); - -void ieee80211_disable_dyn_ps(struct ieee80211_vif *vif) -{ - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_local *local = sdata->local; - struct ieee80211_conf *conf = &local->hw.conf; - - WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION || - !(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) || - (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)); - - local->disable_dynamic_ps = true; - conf->dynamic_ps_timeout = 0; - del_timer_sync(&local->dynamic_ps_timer); - ieee80211_queue_work(&local->hw, - &local->dynamic_ps_enable_work); -} -EXPORT_SYMBOL(ieee80211_disable_dyn_ps); - /* powersave */ static void ieee80211_enable_ps(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) @@ -1042,7 +1312,6 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) } if (count == 1 && ieee80211_powersave_allowed(found)) { - struct ieee80211_conf *conf = &local->hw.conf; s32 beaconint_us; if (latency < 0) @@ -1066,20 +1335,13 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) else timeout = 100; } - local->dynamic_ps_user_timeout = timeout; - if (!local->disable_dynamic_ps) - conf->dynamic_ps_timeout = - local->dynamic_ps_user_timeout; + local->hw.conf.dynamic_ps_timeout = timeout; if (beaconint_us > latency) { local->ps_sdata = NULL; } else { - struct ieee80211_bss *bss; int maxslp = 1; - u8 dtimper; - - bss = (void *)found->u.mgd.associated->priv; - dtimper = bss->dtim_period; + u8 dtimper = found->u.mgd.dtim_period; /* If the TIM IE is invalid, pretend the value is 1 */ if (!dtimper) @@ -1143,8 +1405,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) if (local->hw.conf.flags & IEEE80211_CONF_PS) return; - if (!local->disable_dynamic_ps && - local->hw.conf.dynamic_ps_timeout > 0) { + if (local->hw.conf.dynamic_ps_timeout > 0) { /* don't enter PS if TX frames are pending */ if (drv_tx_frames_pending(local)) { mod_timer(&local->dynamic_ps_timer, jiffies + @@ -1209,16 +1470,30 @@ void ieee80211_dynamic_ps_timer(unsigned long data) ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work); } +void ieee80211_dfs_cac_timer_work(struct work_struct *work) +{ + struct delayed_work *delayed_work = + container_of(work, struct delayed_work, work); + struct ieee80211_sub_if_data *sdata = + container_of(delayed_work, struct ieee80211_sub_if_data, + dfs_cac_timer_work); + + ieee80211_vif_release_channel(sdata); + + cfg80211_cac_event(sdata->dev, NL80211_RADAR_CAC_FINISHED, GFP_KERNEL); +} + /* MLME */ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - u8 *wmm_param, size_t wmm_param_len) + const u8 *wmm_param, size_t wmm_param_len) { struct ieee80211_tx_queue_params params; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; size_t left; int count; - u8 *pos, uapsd_queues = 0; + const u8 *pos; + u8 uapsd_queues = 0; if (!local->ops->conf_tx) return false; @@ -1410,10 +1685,18 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_led_assoc(local, 1); - if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) - bss_conf->dtim_period = bss->dtim_period; - else + if (sdata->u.mgd.assoc_data->have_beacon) { + /* + * If the AP is buggy we may get here with no DTIM period + * known, so assume it's 1 which is the only safe assumption + * in that case, although if the TIM IE is broken powersave + * probably just won't work at all. + */ + bss_conf->dtim_period = sdata->u.mgd.dtim_period ?: 1; + bss_info_changed |= BSS_CHANGED_DTIM_PERIOD; + } else { bss_conf->dtim_period = 0; + } bss_conf->assoc = 1; @@ -1423,10 +1706,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, bss_info_changed |= BSS_CHANGED_CQM; /* Enable ARP filtering */ - if (bss_conf->arp_filter_enabled != sdata->arp_filter_state) { - bss_conf->arp_filter_enabled = sdata->arp_filter_state; + if (bss_conf->arp_addr_cnt) bss_info_changed |= BSS_CHANGED_ARP_FILTER; - } ieee80211_bss_info_change_notify(sdata, bss_info_changed); @@ -1447,7 +1728,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; - struct sta_info *sta; u32 changed = 0; ASSERT_MGD_MTX(ifmgd); @@ -1479,14 +1759,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, netif_tx_stop_all_queues(sdata->dev); netif_carrier_off(sdata->dev); - mutex_lock(&local->sta_mtx); - sta = sta_info_get(sdata, ifmgd->bssid); - if (sta) { - set_sta_flag(sta, WLAN_STA_BLOCK_BA); - ieee80211_sta_tear_down_BA_sessions(sta, false); - } - mutex_unlock(&local->sta_mtx); - /* * if we want to get out of ps before disassoc (why?) we have * to do it before sending disassoc, as otherwise the null-packet @@ -1518,7 +1790,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(ifmgd->bssid, 0, ETH_ALEN); /* remove AP and TDLS peers */ - sta_info_flush(local, sdata); + sta_info_flush_defer(sdata); /* finally reset all BSS / config parameters */ changed |= ieee80211_reset_erp_info(sdata); @@ -1540,10 +1812,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&local->dynamic_ps_enable_work); /* Disable ARP filtering */ - if (sdata->vif.bss_conf.arp_filter_enabled) { - sdata->vif.bss_conf.arp_filter_enabled = false; + if (sdata->vif.bss_conf.arp_addr_cnt) changed |= BSS_CHANGED_ARP_FILTER; - } sdata->vif.bss_conf.qos = false; changed |= BSS_CHANGED_QOS; @@ -1562,6 +1832,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.timers_running = 0; + sdata->vif.bss_conf.dtim_period = 0; + ifmgd->flags = 0; ieee80211_vif_release_channel(sdata); } @@ -1624,17 +1896,18 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, if (!ieee80211_is_data(hdr->frame_control)) return; - if (ack) - ieee80211_sta_reset_conn_monitor(sdata); - if (ieee80211_is_nullfunc(hdr->frame_control) && sdata->u.mgd.probe_send_count > 0) { if (ack) - sdata->u.mgd.probe_send_count = 0; + ieee80211_sta_reset_conn_monitor(sdata); else sdata->u.mgd.nullfunc_failed = true; ieee80211_queue_work(&sdata->local->hw, &sdata->work); + return; } + + if (ack) + ieee80211_sta_reset_conn_monitor(sdata); } static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) @@ -1675,7 +1948,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) ssid_len = ssid[1]; ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid_len, NULL, - 0, (u32) -1, true, false, + 0, (u32) -1, true, 0, ifmgd->associated->channel, false); rcu_read_unlock(); } @@ -1709,7 +1982,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, if (beacon) mlme_dbg_ratelimited(sdata, - "detected beacon loss from AP - sending probe request\n"); + "detected beacon loss from AP - probing\n"); ieee80211_cqm_rssi_notify(&sdata->vif, NL80211_CQM_RSSI_BEACON_LOSS_EVENT, GFP_KERNEL); @@ -1790,11 +2063,9 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_ap_probereq_get); -static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata, - bool transmit_frame) +static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct ieee80211_local *local = sdata->local; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; mutex_lock(&ifmgd->mtx); @@ -1805,8 +2076,10 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, - transmit_frame, frame_buf); + true, frame_buf); ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; + ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CSA); mutex_unlock(&ifmgd->mtx); /* @@ -1814,10 +2087,6 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata, * but that's not a problem. */ cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); - - mutex_lock(&local->mtx); - ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); } static void ieee80211_beacon_connection_loss_work(struct work_struct *work) @@ -1836,10 +2105,10 @@ static void ieee80211_beacon_connection_loss_work(struct work_struct *work) rcu_read_unlock(); } - if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) { + if (ifmgd->connection_loss) { sdata_info(sdata, "Connection to AP %pM lost\n", ifmgd->bssid); - __ieee80211_disconnect(sdata, false); + __ieee80211_disconnect(sdata); } else { ieee80211_mgd_probe_ap(sdata, true); } @@ -1851,9 +2120,7 @@ static void ieee80211_csa_connection_drop_work(struct work_struct *work) container_of(work, struct ieee80211_sub_if_data, u.mgd.csa_connection_drop_work); - ieee80211_wake_queues_by_reason(&sdata->local->hw, - IEEE80211_QUEUE_STOP_REASON_CSA); - __ieee80211_disconnect(sdata, true); + __ieee80211_disconnect(sdata); } void ieee80211_beacon_loss(struct ieee80211_vif *vif) @@ -1864,6 +2131,7 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif) trace_api_beacon_loss(sdata); WARN_ON(hw->flags & IEEE80211_HW_CONNECTION_MONITOR); + sdata->u.mgd.connection_loss = false; ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work); } EXPORT_SYMBOL(ieee80211_beacon_loss); @@ -1875,7 +2143,7 @@ void ieee80211_connection_loss(struct ieee80211_vif *vif) trace_api_connection_loss(sdata); - WARN_ON(!(hw->flags & IEEE80211_HW_CONNECTION_MONITOR)); + sdata->u.mgd.connection_loss = true; ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work); } EXPORT_SYMBOL(ieee80211_connection_loss); @@ -1897,7 +2165,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, ieee80211_vif_release_channel(sdata); } - cfg80211_put_bss(auth_data->bss); + cfg80211_put_bss(sdata->local->hw.wiphy, auth_data->bss); kfree(auth_data); sdata->u.mgd.auth_data = NULL; } @@ -1905,9 +2173,11 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { + struct ieee80211_local *local = sdata->local; struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data; u8 *pos; struct ieee802_11_elems elems; + u32 tx_flags = 0; pos = mgmt->u.auth.variable; ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); @@ -1915,11 +2185,14 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, return; auth_data->expected_transaction = 4; drv_mgd_prepare_tx(sdata->local, sdata); + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_INTFL_MLME_CONN_TX; ieee80211_send_auth(sdata, 3, auth_data->algorithm, 0, elems.challenge - 2, elems.challenge_len + 2, auth_data->bss->bssid, auth_data->bss->bssid, auth_data->key, auth_data->key_len, - auth_data->key_idx); + auth_data->key_idx, tx_flags); } static enum rx_mgmt_action __must_check @@ -1986,6 +2259,7 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "authenticated\n"); ifmgd->auth_data->done = true; ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC; + ifmgd->auth_data->timeout_started = true; run_again(ifmgd, ifmgd->auth_data->timeout); if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && @@ -2044,10 +2318,6 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - mutex_lock(&sdata->local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&sdata->local->mtx); - return RX_MGMT_CFG80211_DEAUTH; } @@ -2075,10 +2345,6 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - mutex_lock(&sdata->local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&sdata->local->mtx); - return RX_MGMT_CFG80211_DISASSOC; } @@ -2188,6 +2454,24 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, ifmgd->aid = aid; + /* + * We previously checked these in the beacon/probe response, so + * they should be present here. This is just a safety net. + */ + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && + (!elems.wmm_param || !elems.ht_cap_elem || !elems.ht_operation)) { + sdata_info(sdata, + "HT AP is missing WMM params or HT capability/operation in AssocResp\n"); + return false; + } + + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && + (!elems.vht_cap_elem || !elems.vht_operation)) { + sdata_info(sdata, + "VHT AP is missing VHT capability/operation in AssocResp\n"); + return false; + } + mutex_lock(&sdata->local->sta_mtx); /* * station info was already allocated and inserted before @@ -2201,17 +2485,36 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[ieee80211_get_sdata_band(sdata)]; + /* Set up internal HT/VHT capabilities */ if (elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, - elems.ht_cap_elem, &sta->sta.ht_cap); - - sta->supports_40mhz = - sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40; + elems.ht_cap_elem, sta); if (elems.vht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, - elems.vht_cap_elem, - &sta->sta.vht_cap); + elems.vht_cap_elem, sta); + + /* + * Some APs, e.g. Netgear WNDR3700, report invalid HT operation data + * in their association response, so ignore that data for our own + * configuration. If it changed since the last beacon, we'll get the + * next beacon and update then. + */ + + /* + * If an operating mode notification IE is present, override the + * NSS calculation (that would be done in rate_control_rate_init()) + * and use the # of streams from that element. + */ + if (elems.opmode_notif && + !(*elems.opmode_notif & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF)) { + u8 nss; + + nss = *elems.opmode_notif & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK; + nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT; + nss += 1; + sta->sta.rx_nss = nss; + } rate_control_rate_init(sta); @@ -2221,9 +2524,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (elems.wmm_param) set_sta_flag(sta, WLAN_STA_WME); - err = sta_info_move_state(sta, IEEE80211_STA_AUTH); - if (!err) - err = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + err = sta_info_move_state(sta, IEEE80211_STA_ASSOC); if (!err && !(ifmgd->flags & IEEE80211_STA_CONTROL_PORT)) err = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); if (err) { @@ -2252,11 +2553,6 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, ieee80211_set_wmm_default(sdata, false); changed |= BSS_CHANGED_QOS; - if (elems.ht_operation && elems.wmm_param && - !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) - changed |= ieee80211_config_ht_tx(sdata, elems.ht_operation, - cbss->bssid, false); - /* set AID and assoc capability, * ieee80211_set_associated() will tell the driver */ bss_conf->aid = aid; @@ -2330,6 +2626,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, "%pM rejected association temporarily; comeback duration %u TU (%u ms)\n", mgmt->sa, tu, ms); assoc_data->timeout = jiffies + msecs_to_jiffies(ms); + assoc_data->timeout_started = true; if (ms > IEEE80211_ASSOC_TIMEOUT) run_again(ifmgd, assoc_data->timeout); return RX_MGMT_NONE; @@ -2345,7 +2642,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (!ieee80211_assoc_success(sdata, *bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ ieee80211_destroy_assoc_data(sdata, false); - cfg80211_put_bss(*bss); + cfg80211_put_bss(sdata->local->hw.wiphy, *bss); return RX_MGMT_CFG80211_ASSOC_TIMEOUT; } sdata_info(sdata, "associated\n"); @@ -2364,8 +2661,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status, - struct ieee802_11_elems *elems, - bool beacon) + struct ieee802_11_elems *elems) { struct ieee80211_local *local = sdata->local; int freq; @@ -2373,11 +2669,18 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel; bool need_ps = false; - if (sdata->u.mgd.associated && - ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) { - bss = (void *)sdata->u.mgd.associated->priv; + if ((sdata->u.mgd.associated && + ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) || + (sdata->u.mgd.assoc_data && + ether_addr_equal(mgmt->bssid, + sdata->u.mgd.assoc_data->bss->bssid))) { /* not previously set so we may need to recalc */ - need_ps = !bss->dtim_period; + need_ps = sdata->u.mgd.associated && !sdata->u.mgd.dtim_period; + + if (elems->tim && !elems->parse_error) { + const struct ieee80211_tim_ie *tim_ie = elems->tim; + sdata->u.mgd.dtim_period = tim_ie->dtim_period; + } } if (elems->ds_params && elems->ds_params_len == 1) @@ -2392,7 +2695,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, return; bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, - channel, beacon); + channel); if (bss) ieee80211_rx_bss_put(local, bss); @@ -2435,7 +2738,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, &elems); - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false); + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); if (ifmgd->associated && ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) @@ -2447,6 +2750,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "direct probe responded\n"); ifmgd->auth_data->tries = 0; ifmgd->auth_data->timeout = jiffies; + ifmgd->auth_data->timeout_started = true; run_again(ifmgd, ifmgd->auth_data->timeout); } } @@ -2472,10 +2776,10 @@ static const u64 care_about_ies = (1ULL << WLAN_EID_HT_CAPABILITY) | (1ULL << WLAN_EID_HT_OPERATION); -static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len, - struct ieee80211_rx_status *rx_status) +static enum rx_mgmt_action +ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + u8 *deauth_buf, struct ieee80211_rx_status *rx_status) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; @@ -2484,6 +2788,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *chan; + struct sta_info *sta; u32 changed = 0; bool erp_valid; u8 erp_value = 0; @@ -2495,40 +2800,51 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, /* Process beacon from the current BSS */ baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; if (baselen > len) - return; + return RX_MGMT_NONE; rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (!chanctx_conf) { rcu_read_unlock(); - return; + return RX_MGMT_NONE; } if (rx_status->freq != chanctx_conf->def.chan->center_freq) { rcu_read_unlock(); - return; + return RX_MGMT_NONE; } chan = chanctx_conf->def.chan; rcu_read_unlock(); - if (ifmgd->assoc_data && !ifmgd->assoc_data->have_beacon && + if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon && ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) { ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, - false); + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); ifmgd->assoc_data->have_beacon = true; - ifmgd->assoc_data->sent_assoc = false; + ifmgd->assoc_data->need_beacon = false; + if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { + sdata->vif.bss_conf.sync_tsf = + le64_to_cpu(mgmt->u.beacon.timestamp); + sdata->vif.bss_conf.sync_device_ts = + rx_status->device_timestamp; + if (elems.tim) + sdata->vif.bss_conf.sync_dtim_count = + elems.tim->dtim_count; + else + sdata->vif.bss_conf.sync_dtim_count = 0; + } /* continue assoc process */ ifmgd->assoc_data->timeout = jiffies; + ifmgd->assoc_data->timeout_started = true; run_again(ifmgd, ifmgd->assoc_data->timeout); - return; + return RX_MGMT_NONE; } if (!ifmgd->associated || !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return; + return RX_MGMT_NONE; bssid = ifmgd->associated->bssid; /* Track average RSSI from the Beacon frames of the current AP */ @@ -2559,12 +2875,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (sig > ifmgd->rssi_max_thold && (last_sig <= ifmgd->rssi_min_thold || last_sig == 0)) { ifmgd->last_ave_beacon_signal = sig; - drv_rssi_callback(local, RSSI_EVENT_HIGH); + drv_rssi_callback(local, sdata, RSSI_EVENT_HIGH); } else if (sig < ifmgd->rssi_min_thold && (last_sig >= ifmgd->rssi_max_thold || last_sig == 0)) { ifmgd->last_ave_beacon_signal = sig; - drv_rssi_callback(local, RSSI_EVENT_LOW); + drv_rssi_callback(local, sdata, RSSI_EVENT_LOW); } } @@ -2594,7 +2910,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (ifmgd->flags & IEEE80211_STA_BEACON_POLL) { mlme_dbg_ratelimited(sdata, - "cancelling probereq poll due to a received beacon\n"); + "cancelling AP probe due to a received beacon\n"); mutex_lock(&local->mtx); ifmgd->flags &= ~IEEE80211_STA_BEACON_POLL; ieee80211_run_deferred_scan(local); @@ -2666,17 +2982,42 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid) - return; + return RX_MGMT_NONE; ifmgd->beacon_crc = ncrc; ifmgd->beacon_crc_valid = true; - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, - true); + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); if (ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, elems.wmm_param_len)) changed |= BSS_CHANGED_QOS; + /* + * If we haven't had a beacon before, tell the driver about the + * DTIM period (and beacon timing if desired) now. + */ + if (!bss_conf->dtim_period) { + /* a few bogus AP send dtim_period = 0 or no TIM IE */ + if (elems.tim) + bss_conf->dtim_period = elems.tim->dtim_period ?: 1; + else + bss_conf->dtim_period = 1; + + if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { + sdata->vif.bss_conf.sync_tsf = + le64_to_cpu(mgmt->u.beacon.timestamp); + sdata->vif.bss_conf.sync_device_ts = + rx_status->device_timestamp; + if (elems.tim) + sdata->vif.bss_conf.sync_dtim_count = + elems.tim->dtim_count; + else + sdata->vif.bss_conf.sync_dtim_count = 0; + } + + changed |= BSS_CHANGED_DTIM_PERIOD; + } + if (elems.erp_info && elems.erp_info_len >= 1) { erp_valid = true; erp_value = elems.erp_info[0]; @@ -2687,11 +3028,22 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, le16_to_cpu(mgmt->u.beacon.capab_info), erp_valid, erp_value); + mutex_lock(&local->sta_mtx); + sta = sta_info_get(sdata, bssid); - if (elems.ht_cap_elem && elems.ht_operation && elems.wmm_param && - !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) - changed |= ieee80211_config_ht_tx(sdata, elems.ht_operation, - bssid, true); + if (ieee80211_config_bw(sdata, sta, elems.ht_operation, + elems.vht_operation, bssid, &changed)) { + mutex_unlock(&local->sta_mtx); + ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, + WLAN_REASON_DEAUTH_LEAVING, + true, deauth_buf); + return RX_MGMT_CFG80211_TX_DEAUTH; + } + + if (sta && elems.opmode_notif) + ieee80211_vht_handle_opmode(sdata, sta, *elems.opmode_notif, + rx_status->band, true); + mutex_unlock(&local->sta_mtx); if (elems.country_elem && elems.pwr_constr_elem && mgmt->u.probe_resp.capab_info & @@ -2702,6 +3054,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, elems.pwr_constr_elem); ieee80211_bss_info_change_notify(sdata, changed); + + return RX_MGMT_NONE; } void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, @@ -2712,6 +3066,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; struct cfg80211_bss *bss = NULL; enum rx_mgmt_action rma = RX_MGMT_NONE; + u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; u16 fc; rx_status = (struct ieee80211_rx_status *) skb->cb; @@ -2722,7 +3077,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_BEACON: - ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status); + rma = ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, + deauth_buf, rx_status); break; case IEEE80211_STYPE_PROBE_RESP: ieee80211_rx_mgmt_probe_resp(sdata, skb); @@ -2771,6 +3127,10 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, case RX_MGMT_CFG80211_ASSOC_TIMEOUT: cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid); break; + case RX_MGMT_CFG80211_TX_DEAUTH: + cfg80211_send_deauth(sdata->dev, deauth_buf, + sizeof(deauth_buf)); + break; default: WARN(1, "unexpected: %d", rma); } @@ -2792,14 +3152,13 @@ static void ieee80211_sta_timer(unsigned long data) } static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, - u8 *bssid, u8 reason) + u8 *bssid, u8 reason, bool tx) { - struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason, - false, frame_buf); + tx, frame_buf); mutex_unlock(&ifmgd->mtx); /* @@ -2808,10 +3167,6 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, */ cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); - mutex_lock(&local->mtx); - ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); - mutex_lock(&ifmgd->mtx); } @@ -2820,12 +3175,17 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_auth_data *auth_data = ifmgd->auth_data; + u32 tx_flags = 0; lockdep_assert_held(&ifmgd->mtx); if (WARN_ON_ONCE(!auth_data)) return -EINVAL; + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_INTFL_MLME_CONN_TX; + auth_data->tries++; if (auth_data->tries > IEEE80211_AUTH_MAX_TRIES) { @@ -2862,7 +3222,8 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) ieee80211_send_auth(sdata, trans, auth_data->algorithm, status, auth_data->data, auth_data->data_len, auth_data->bss->bssid, - auth_data->bss->bssid, NULL, 0, 0); + auth_data->bss->bssid, NULL, 0, 0, + tx_flags); } else { const u8 *ssidie; @@ -2881,13 +3242,18 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) * will not answer to direct packet in unassociated state. */ ieee80211_send_probe_req(sdata, NULL, ssidie + 2, ssidie[1], - NULL, 0, (u32) -1, true, false, + NULL, 0, (u32) -1, true, tx_flags, auth_data->bss->channel, false); rcu_read_unlock(); } - auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; - run_again(ifmgd, auth_data->timeout); + if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { + auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; + ifmgd->auth_data->timeout_started = true; + run_again(ifmgd, auth_data->timeout); + } else { + auth_data->timeout_started = false; + } return 0; } @@ -2918,12 +3284,29 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) IEEE80211_ASSOC_MAX_TRIES); ieee80211_send_assoc(sdata); - assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT; - run_again(&sdata->u.mgd, assoc_data->timeout); + if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { + assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT; + assoc_data->timeout_started = true; + run_again(&sdata->u.mgd, assoc_data->timeout); + } else { + assoc_data->timeout_started = false; + } return 0; } +void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata, + __le16 fc, bool acked) +{ + struct ieee80211_local *local = sdata->local; + + sdata->u.mgd.status_fc = fc; + sdata->u.mgd.status_acked = acked; + sdata->u.mgd.status_received = true; + + ieee80211_queue_work(&local->hw, &sdata->work); +} + void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; @@ -2931,7 +3314,36 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) mutex_lock(&ifmgd->mtx); - if (ifmgd->auth_data && + if (ifmgd->status_received) { + __le16 fc = ifmgd->status_fc; + bool status_acked = ifmgd->status_acked; + + ifmgd->status_received = false; + if (ifmgd->auth_data && + (ieee80211_is_probe_req(fc) || ieee80211_is_auth(fc))) { + if (status_acked) { + ifmgd->auth_data->timeout = + jiffies + IEEE80211_AUTH_TIMEOUT_SHORT; + run_again(ifmgd, ifmgd->auth_data->timeout); + } else { + ifmgd->auth_data->timeout = jiffies - 1; + } + ifmgd->auth_data->timeout_started = true; + } else if (ifmgd->assoc_data && + (ieee80211_is_assoc_req(fc) || + ieee80211_is_reassoc_req(fc))) { + if (status_acked) { + ifmgd->assoc_data->timeout = + jiffies + IEEE80211_ASSOC_TIMEOUT_SHORT; + run_again(ifmgd, ifmgd->assoc_data->timeout); + } else { + ifmgd->assoc_data->timeout = jiffies - 1; + } + ifmgd->assoc_data->timeout_started = true; + } + } + + if (ifmgd->auth_data && ifmgd->auth_data->timeout_started && time_after(jiffies, ifmgd->auth_data->timeout)) { if (ifmgd->auth_data->done) { /* @@ -2950,12 +3362,13 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) cfg80211_send_auth_timeout(sdata->dev, bssid); mutex_lock(&ifmgd->mtx); } - } else if (ifmgd->auth_data) + } else if (ifmgd->auth_data && ifmgd->auth_data->timeout_started) run_again(ifmgd, ifmgd->auth_data->timeout); - if (ifmgd->assoc_data && + if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started && time_after(jiffies, ifmgd->assoc_data->timeout)) { - if (!ifmgd->assoc_data->have_beacon || + if ((ifmgd->assoc_data->need_beacon && + !ifmgd->assoc_data->have_beacon) || ieee80211_do_assoc(sdata)) { u8 bssid[ETH_ALEN]; @@ -2967,7 +3380,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) cfg80211_send_assoc_timeout(sdata->dev, bssid); mutex_lock(&ifmgd->mtx); } - } else if (ifmgd->assoc_data) + } else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started) run_again(ifmgd, ifmgd->assoc_data->timeout); if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | @@ -2998,7 +3411,8 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) "No ack for nullfunc frame to AP %pM, disconnecting.\n", bssid); ieee80211_sta_connection_lost(sdata, bssid, - WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); + WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, + false); } } else if (time_is_after_jiffies(ifmgd->probe_timeout)) run_again(ifmgd, ifmgd->probe_timeout); @@ -3007,7 +3421,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) "Failed to send nullfunc to AP %pM after %dms, disconnecting\n", bssid, probe_wait_ms); ieee80211_sta_connection_lost(sdata, bssid, - WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); + WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false); } else if (ifmgd->probe_send_count < max_tries) { mlme_dbg(sdata, "No probe response from AP %pM after %dms, try %d/%i\n", @@ -3026,15 +3440,11 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) bssid, probe_wait_ms); ieee80211_sta_connection_lost(sdata, bssid, - WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); + WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false); } } mutex_unlock(&ifmgd->mtx); - - mutex_lock(&local->mtx); - ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); } static void ieee80211_sta_bcn_mon_timer(unsigned long data) @@ -3046,6 +3456,7 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data) if (local->quiescing) return; + sdata->u.mgd.connection_loss = false; ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_connection_loss_work); } @@ -3095,6 +3506,14 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; /* + * Stop timers before deleting work items, as timers + * could race and re-add the work-items. They will be + * re-established on connection. + */ + del_timer_sync(&ifmgd->conn_mon_timer); + del_timer_sync(&ifmgd->bcn_mon_timer); + + /* * we need to use atomic bitops for the running bits * only because both timers might fire at the same * time -- the code here is properly synchronised. @@ -3108,36 +3527,32 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) if (del_timer_sync(&ifmgd->timer)) set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); - cancel_work_sync(&ifmgd->chswitch_work); if (del_timer_sync(&ifmgd->chswitch_timer)) set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running); - - /* these will just be re-established on connection */ - del_timer_sync(&ifmgd->conn_mon_timer); - del_timer_sync(&ifmgd->bcn_mon_timer); + cancel_work_sync(&ifmgd->chswitch_work); } void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - if (!ifmgd->associated) + mutex_lock(&ifmgd->mtx); + if (!ifmgd->associated) { + mutex_unlock(&ifmgd->mtx); return; + } if (sdata->flags & IEEE80211_SDATA_DISCONNECT_RESUME) { sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME; - mutex_lock(&ifmgd->mtx); - if (ifmgd->associated) { - mlme_dbg(sdata, - "driver requested disconnect after resume\n"); - ieee80211_sta_connection_lost(sdata, - ifmgd->associated->bssid, - WLAN_REASON_UNSPECIFIED); - mutex_unlock(&ifmgd->mtx); - return; - } + mlme_dbg(sdata, "driver requested disconnect after resume\n"); + ieee80211_sta_connection_lost(sdata, + ifmgd->associated->bssid, + WLAN_REASON_UNSPECIFIED, + true); mutex_unlock(&ifmgd->mtx); + return; } + mutex_unlock(&ifmgd->mtx); if (test_and_clear_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running)) add_timer(&ifmgd->timer); @@ -3193,8 +3608,10 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) /* Restart STA timers */ rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) - ieee80211_restart_sta_timer(sdata); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (ieee80211_sdata_running(sdata)) + ieee80211_restart_sta_timer(sdata); + } rcu_read_unlock(); } @@ -3213,201 +3630,6 @@ int ieee80211_max_network_latency(struct notifier_block *nb, return 0; } -static u32 chandef_downgrade(struct cfg80211_chan_def *c) -{ - u32 ret; - int tmp; - - switch (c->width) { - case NL80211_CHAN_WIDTH_20: - c->width = NL80211_CHAN_WIDTH_20_NOHT; - ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - break; - case NL80211_CHAN_WIDTH_40: - c->width = NL80211_CHAN_WIDTH_20; - c->center_freq1 = c->chan->center_freq; - ret = IEEE80211_STA_DISABLE_40MHZ | - IEEE80211_STA_DISABLE_VHT; - break; - case NL80211_CHAN_WIDTH_80: - tmp = (30 + c->chan->center_freq - c->center_freq1)/20; - /* n_P40 */ - tmp /= 2; - /* freq_P40 */ - c->center_freq1 = c->center_freq1 - 20 + 40 * tmp; - c->width = NL80211_CHAN_WIDTH_40; - ret = IEEE80211_STA_DISABLE_VHT; - break; - case NL80211_CHAN_WIDTH_80P80: - c->center_freq2 = 0; - c->width = NL80211_CHAN_WIDTH_80; - ret = IEEE80211_STA_DISABLE_80P80MHZ | - IEEE80211_STA_DISABLE_160MHZ; - break; - case NL80211_CHAN_WIDTH_160: - /* n_P20 */ - tmp = (70 + c->chan->center_freq - c->center_freq1)/20; - /* n_P80 */ - tmp /= 4; - c->center_freq1 = c->center_freq1 - 40 + 80 * tmp; - c->width = NL80211_CHAN_WIDTH_80; - ret = IEEE80211_STA_DISABLE_80P80MHZ | - IEEE80211_STA_DISABLE_160MHZ; - break; - default: - case NL80211_CHAN_WIDTH_20_NOHT: - WARN_ON_ONCE(1); - c->width = NL80211_CHAN_WIDTH_20_NOHT; - ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - break; - } - - WARN_ON_ONCE(!cfg80211_chandef_valid(c)); - - return ret; -} - -static u32 -ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - struct ieee80211_channel *channel, - const struct ieee80211_ht_operation *ht_oper, - const struct ieee80211_vht_operation *vht_oper, - struct cfg80211_chan_def *chandef) -{ - struct cfg80211_chan_def vht_chandef; - u32 ht_cfreq, ret; - - chandef->chan = channel; - chandef->width = NL80211_CHAN_WIDTH_20_NOHT; - chandef->center_freq1 = channel->center_freq; - chandef->center_freq2 = 0; - - if (!ht_oper || !sband->ht_cap.ht_supported) { - ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - goto out; - } - - chandef->width = NL80211_CHAN_WIDTH_20; - - ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan, - channel->band); - /* check that channel matches the right operating channel */ - if (channel->center_freq != ht_cfreq) { - /* - * It's possible that some APs are confused here; - * Netgear WNDR3700 sometimes reports 4 higher than - * the actual channel in association responses, but - * since we look at probe response/beacon data here - * it should be OK. - */ - sdata_info(sdata, - "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", - channel->center_freq, ht_cfreq, - ht_oper->primary_chan, channel->band); - ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - goto out; - } - - /* check 40 MHz support, if we have it */ - if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) { - switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { - case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: - chandef->width = NL80211_CHAN_WIDTH_40; - chandef->center_freq1 += 10; - break; - case IEEE80211_HT_PARAM_CHA_SEC_BELOW: - chandef->width = NL80211_CHAN_WIDTH_40; - chandef->center_freq1 -= 10; - break; - } - } else { - /* 40 MHz (and 80 MHz) must be supported for VHT */ - ret = IEEE80211_STA_DISABLE_VHT; - goto out; - } - - if (!vht_oper || !sband->vht_cap.vht_supported) { - ret = IEEE80211_STA_DISABLE_VHT; - goto out; - } - - vht_chandef.chan = channel; - vht_chandef.center_freq1 = - ieee80211_channel_to_frequency(vht_oper->center_freq_seg1_idx, - channel->band); - vht_chandef.center_freq2 = 0; - - if (vht_oper->center_freq_seg2_idx) - vht_chandef.center_freq2 = - ieee80211_channel_to_frequency( - vht_oper->center_freq_seg2_idx, - channel->band); - - switch (vht_oper->chan_width) { - case IEEE80211_VHT_CHANWIDTH_USE_HT: - vht_chandef.width = chandef->width; - break; - case IEEE80211_VHT_CHANWIDTH_80MHZ: - vht_chandef.width = NL80211_CHAN_WIDTH_80; - break; - case IEEE80211_VHT_CHANWIDTH_160MHZ: - vht_chandef.width = NL80211_CHAN_WIDTH_160; - break; - case IEEE80211_VHT_CHANWIDTH_80P80MHZ: - vht_chandef.width = NL80211_CHAN_WIDTH_80P80; - break; - default: - sdata_info(sdata, - "AP VHT operation IE has invalid channel width (%d), disable VHT\n", - vht_oper->chan_width); - ret = IEEE80211_STA_DISABLE_VHT; - goto out; - } - - if (!cfg80211_chandef_valid(&vht_chandef)) { - sdata_info(sdata, - "AP VHT information is invalid, disable VHT\n"); - ret = IEEE80211_STA_DISABLE_VHT; - goto out; - } - - if (cfg80211_chandef_identical(chandef, &vht_chandef)) { - ret = 0; - goto out; - } - - if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) { - sdata_info(sdata, - "AP VHT information doesn't match HT, disable VHT\n"); - ret = IEEE80211_STA_DISABLE_VHT; - goto out; - } - - *chandef = vht_chandef; - - ret = 0; - - while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, - IEEE80211_CHAN_DISABLED)) { - if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { - ret = IEEE80211_STA_DISABLE_HT | - IEEE80211_STA_DISABLE_VHT; - goto out; - } - - ret = chandef_downgrade(chandef); - } - - if (chandef->width != vht_chandef.width) - sdata_info(sdata, - "local regulatory prevented using AP HT/VHT configuration, downgraded\n"); - -out: - WARN_ON_ONCE(!cfg80211_chandef_valid(chandef)); - return ret; -} - static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata, struct cfg80211_bss *cbss) { @@ -3473,16 +3695,22 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && sband->ht_cap.ht_supported) { - const u8 *ht_oper_ie; + const u8 *ht_oper_ie, *ht_cap; ht_oper_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_OPERATION); if (ht_oper_ie && ht_oper_ie[1] >= sizeof(*ht_oper)) ht_oper = (void *)(ht_oper_ie + 2); + + ht_cap = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_CAPABILITY); + if (!ht_cap || ht_cap[1] < sizeof(struct ieee80211_ht_cap)) { + ifmgd->flags |= IEEE80211_STA_DISABLE_HT; + ht_oper = NULL; + } } if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && sband->vht_cap.vht_supported) { - const u8 *vht_oper_ie; + const u8 *vht_oper_ie, *vht_cap; vht_oper_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_VHT_OPERATION); @@ -3492,15 +3720,21 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, vht_oper = NULL; sdata_info(sdata, "AP advertised VHT without HT, disabling both\n"); - sdata->flags |= IEEE80211_STA_DISABLE_HT; - sdata->flags |= IEEE80211_STA_DISABLE_VHT; + ifmgd->flags |= IEEE80211_STA_DISABLE_HT; + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + } + + vht_cap = ieee80211_bss_get_ie(cbss, WLAN_EID_VHT_CAPABILITY); + if (!vht_cap || vht_cap[1] < sizeof(struct ieee80211_vht_cap)) { + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + vht_oper = NULL; } } ifmgd->flags |= ieee80211_determine_chantype(sdata, sband, cbss->channel, ht_oper, vht_oper, - &chandef); + &chandef, true); sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss), local->rx_chains); @@ -3517,8 +3751,11 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, */ ret = ieee80211_vif_use_channel(sdata, &chandef, IEEE80211_CHANCTX_SHARED); - while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) + while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) { ifmgd->flags |= chandef_downgrade(&chandef); + ret = ieee80211_vif_use_channel(sdata, &chandef, + IEEE80211_CHANCTX_SHARED); + } return ret; } @@ -3547,15 +3784,12 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, return -ENOMEM; } - mutex_lock(&local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&local->mtx); - if (new_sta) { u32 rates = 0, basic_rates = 0; bool have_higher_than_11mbit; int min_rate = INT_MAX, min_rate_index = -1; struct ieee80211_supported_band *sband; + const struct cfg80211_bss_ies *ies; sband = local->hw.wiphy->bands[cbss->channel->band]; @@ -3599,8 +3833,34 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, /* set timing information */ sdata->vif.bss_conf.beacon_int = cbss->beacon_interval; - sdata->vif.bss_conf.sync_tsf = cbss->tsf; - sdata->vif.bss_conf.sync_device_ts = bss->device_ts; + rcu_read_lock(); + ies = rcu_dereference(cbss->beacon_ies); + if (ies) { + const u8 *tim_ie; + + sdata->vif.bss_conf.sync_tsf = ies->tsf; + sdata->vif.bss_conf.sync_device_ts = + bss->device_ts_beacon; + tim_ie = cfg80211_find_ie(WLAN_EID_TIM, + ies->data, ies->len); + if (tim_ie && tim_ie[1] >= 2) + sdata->vif.bss_conf.sync_dtim_count = tim_ie[2]; + else + sdata->vif.bss_conf.sync_dtim_count = 0; + } else if (!(local->hw.flags & + IEEE80211_HW_TIMING_BEACON_ONLY)) { + ies = rcu_dereference(cbss->proberesp_ies); + /* must be non-NULL since beacon IEs were NULL */ + sdata->vif.bss_conf.sync_tsf = ies->tsf; + sdata->vif.bss_conf.sync_device_ts = + bss->device_ts_presp; + sdata->vif.bss_conf.sync_dtim_count = 0; + } else { + sdata->vif.bss_conf.sync_tsf = 0; + sdata->vif.bss_conf.sync_device_ts = 0; + sdata->vif.bss_conf.sync_dtim_count = 0; + } + rcu_read_unlock(); /* tell driver about BSSID, basic rates and timing */ ieee80211_bss_info_change_notify(sdata, @@ -3704,8 +3964,16 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, /* prep auth_data so we don't go into idle on disassoc */ ifmgd->auth_data = auth_data; - if (ifmgd->associated) - ieee80211_set_disassoc(sdata, 0, 0, false, NULL); + if (ifmgd->associated) { + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; + + ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, + WLAN_REASON_UNSPECIFIED, + false, frame_buf); + + __cfg80211_send_deauth(sdata->dev, frame_buf, + sizeof(frame_buf)); + } sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); @@ -3720,7 +3988,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, } /* hold our own reference */ - cfg80211_ref_bss(auth_data->bss); + cfg80211_ref_bss(local->hw.wiphy, auth_data->bss); err = 0; goto out_unlock; @@ -3743,8 +4011,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss *bss = (void *)req->bss->priv; struct ieee80211_mgd_assoc_data *assoc_data; + const struct cfg80211_bss_ies *beacon_ies; struct ieee80211_supported_band *sband; - const u8 *ssidie, *ht_ie; + const u8 *ssidie, *ht_ie, *vht_ie; int i, err; assoc_data = kzalloc(sizeof(*assoc_data) + req->ie_len, GFP_KERNEL); @@ -3764,8 +4033,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, mutex_lock(&ifmgd->mtx); - if (ifmgd->associated) - ieee80211_set_disassoc(sdata, 0, 0, false, NULL); + if (ifmgd->associated) { + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; + + ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, + WLAN_REASON_UNSPECIFIED, + false, frame_buf); + + __cfg80211_send_deauth(sdata->dev, frame_buf, + sizeof(frame_buf)); + } if (ifmgd->auth_data && !ifmgd->auth_data->done) { err = -EBUSY; @@ -3863,6 +4140,12 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ((struct ieee80211_ht_operation *)(ht_ie + 2))->ht_param; else ifmgd->flags |= IEEE80211_STA_DISABLE_HT; + vht_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_VHT_CAPABILITY); + if (vht_ie && vht_ie[1] >= sizeof(struct ieee80211_vht_cap)) + memcpy(&assoc_data->ap_vht_cap, vht_ie + 2, + sizeof(struct ieee80211_vht_cap)); + else + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; rcu_read_unlock(); if (bss->wmm_used && bss->uapsd_supported && @@ -3896,13 +4179,17 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, /* kick off associate process */ ifmgd->assoc_data = assoc_data; + ifmgd->dtim_period = 0; err = ieee80211_prep_connection(sdata, req->bss, true); if (err) goto err_clear; - if (!bss->dtim_period && - sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) { + rcu_read_lock(); + beacon_ies = rcu_dereference(req->bss->beacon_ies); + + if (sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC && + !beacon_ies) { /* * Wait up to one beacon interval ... * should this be more if we miss one? @@ -3910,11 +4197,36 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "waiting for beacon from %pM\n", ifmgd->bssid); assoc_data->timeout = TU_TO_EXP_TIME(req->bss->beacon_interval); - } else { + assoc_data->timeout_started = true; + assoc_data->need_beacon = true; + } else if (beacon_ies) { + const u8 *tim_ie = cfg80211_find_ie(WLAN_EID_TIM, + beacon_ies->data, + beacon_ies->len); + u8 dtim_count = 0; + + if (tim_ie && tim_ie[1] >= sizeof(struct ieee80211_tim_ie)) { + const struct ieee80211_tim_ie *tim; + tim = (void *)(tim_ie + 2); + ifmgd->dtim_period = tim->dtim_period; + dtim_count = tim->dtim_count; + } assoc_data->have_beacon = true; - assoc_data->sent_assoc = false; assoc_data->timeout = jiffies; + assoc_data->timeout_started = true; + + if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { + sdata->vif.bss_conf.sync_tsf = beacon_ies->tsf; + sdata->vif.bss_conf.sync_device_ts = + bss->device_ts_beacon; + sdata->vif.bss_conf.sync_dtim_count = dtim_count; + } + } else { + assoc_data->timeout = jiffies; + assoc_data->timeout_started = true; } + rcu_read_unlock(); + run_again(ifmgd, assoc_data->timeout); if (bss->corrupt_data) { @@ -3981,10 +4293,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, mutex_unlock(&ifmgd->mtx); out: - mutex_lock(&sdata->local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&sdata->local->mtx); - if (sent_frame) __cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); @@ -4025,10 +4333,6 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, __cfg80211_send_disassoc(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); - mutex_lock(&sdata->local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&sdata->local->mtx); - return 0; } @@ -4036,6 +4340,17 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + /* + * Make sure some work items will not run after this, + * they will not do anything but might not have been + * cancelled when disconnecting. + */ + cancel_work_sync(&ifmgd->monitor_work); + cancel_work_sync(&ifmgd->beacon_connection_loss_work); + cancel_work_sync(&ifmgd->request_smps_work); + cancel_work_sync(&ifmgd->csa_connection_drop_work); + cancel_work_sync(&ifmgd->chswitch_work); + mutex_lock(&ifmgd->mtx); if (ifmgd->assoc_data) ieee80211_destroy_assoc_data(sdata, false); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index a5379ae..430bd25 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -102,8 +102,7 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata) ieee80211_sta_reset_conn_monitor(sdata); } -void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, - bool offchannel_ps_enable) +void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; @@ -114,6 +113,15 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, * notify the AP about us leaving the channel and stop all * STA interfaces. */ + + /* + * Stop queues and transmit all frames queued by the driver + * before sending nullfunc to enable powersave at the AP. + */ + ieee80211_stop_queues_by_reason(&local->hw, + IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); + drv_flush(local, false); + mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) @@ -126,25 +134,22 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); /* Check to see if we should disable beaconing. */ - if (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_ADHOC || - sdata->vif.type == NL80211_IFTYPE_MESH_POINT) + if (sdata->vif.bss_conf.enable_beacon) { + set_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, + &sdata->state); + sdata->vif.bss_conf.enable_beacon = false; ieee80211_bss_info_change_notify( sdata, BSS_CHANGED_BEACON_ENABLED); - - if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { - netif_tx_stop_all_queues(sdata->dev); - if (offchannel_ps_enable && - (sdata->vif.type == NL80211_IFTYPE_STATION) && - sdata->u.mgd.associated) - ieee80211_offchannel_ps_enable(sdata); } + + if (sdata->vif.type == NL80211_IFTYPE_STATION && + sdata->u.mgd.associated) + ieee80211_offchannel_ps_enable(sdata); } mutex_unlock(&local->iflist_mtx); } -void ieee80211_offchannel_return(struct ieee80211_local *local, - bool offchannel_ps_disable) +void ieee80211_offchannel_return(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; @@ -163,33 +168,21 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, continue; /* Tell AP we're back */ - if (offchannel_ps_disable && - sdata->vif.type == NL80211_IFTYPE_STATION) { - if (sdata->u.mgd.associated) - ieee80211_offchannel_ps_disable(sdata); - } + if (sdata->vif.type == NL80211_IFTYPE_STATION && + sdata->u.mgd.associated) + ieee80211_offchannel_ps_disable(sdata); - if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { - /* - * This may wake up queues even though the driver - * currently has them stopped. This is not very - * likely, since the driver won't have gotten any - * (or hardly any) new packets while we weren't - * on the right channel, and even if it happens - * it will at most lead to queueing up one more - * packet per queue in mac80211 rather than on - * the interface qdisc. - */ - netif_tx_wake_all_queues(sdata->dev); - } - - if (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_ADHOC || - sdata->vif.type == NL80211_IFTYPE_MESH_POINT) + if (test_and_clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, + &sdata->state)) { + sdata->vif.bss_conf.enable_beacon = true; ieee80211_bss_info_change_notify( sdata, BSS_CHANGED_BEACON_ENABLED); + } } mutex_unlock(&local->iflist_mtx); + + ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); } void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) @@ -304,10 +297,13 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) } } -void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) +void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free) { struct ieee80211_roc_work *dep, *tmp; + if (WARN_ON(roc->to_be_freed)) + return; + /* was never transmitted */ if (roc->frame) { cfg80211_mgmt_tx_status(&roc->sdata->wdev, @@ -323,9 +319,12 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) GFP_KERNEL); list_for_each_entry_safe(dep, tmp, &roc->dependents, list) - ieee80211_roc_notify_destroy(dep); + ieee80211_roc_notify_destroy(dep, true); - kfree(roc); + if (free) + kfree(roc); + else + roc->to_be_freed = true; } void ieee80211_sw_roc_work(struct work_struct *work) @@ -338,6 +337,9 @@ void ieee80211_sw_roc_work(struct work_struct *work) mutex_lock(&local->mtx); + if (roc->to_be_freed) + goto out_unlock; + if (roc->abort) goto finish; @@ -377,7 +379,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) finish: list_del(&roc->list); started = roc->started; - ieee80211_roc_notify_destroy(roc); + ieee80211_roc_notify_destroy(roc, !roc->abort); if (started) { drv_flush(local, false); @@ -385,7 +387,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) local->tmp_channel = NULL; ieee80211_hw_config(local, 0); - ieee80211_offchannel_return(local, true); + ieee80211_offchannel_return(local); } ieee80211_recalc_idle(local); @@ -417,7 +419,7 @@ static void ieee80211_hw_roc_done(struct work_struct *work) list_del(&roc->list); - ieee80211_roc_notify_destroy(roc); + ieee80211_roc_notify_destroy(roc, true); /* if there's another roc, start it now */ ieee80211_start_next_roc(local); @@ -467,12 +469,14 @@ void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata) list_for_each_entry_safe(roc, tmp, &tmp_list, list) { if (local->ops->remain_on_channel) { list_del(&roc->list); - ieee80211_roc_notify_destroy(roc); + ieee80211_roc_notify_destroy(roc, true); } else { ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); /* work will clean up etc */ flush_delayed_work(&roc->work); + WARN_ON(!roc->to_be_freed); + kfree(roc); } } diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 79a48f3..d0275f3 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -7,25 +7,23 @@ #include "led.h" /* return value indicates whether the driver should be further notified */ -static bool ieee80211_quiesce(struct ieee80211_sub_if_data *sdata) +static void ieee80211_quiesce(struct ieee80211_sub_if_data *sdata) { switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: ieee80211_sta_quiesce(sdata); - return true; + break; case NL80211_IFTYPE_ADHOC: ieee80211_ibss_quiesce(sdata); - return true; + break; case NL80211_IFTYPE_MESH_POINT: ieee80211_mesh_quiesce(sdata); - return true; - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_MONITOR: - /* don't tell driver about this */ - return false; + break; default: - return true; + break; } + + cancel_work_sync(&sdata->work); } int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) @@ -40,11 +38,14 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) ieee80211_scan_cancel(local); + ieee80211_dfs_cac_cancel(local); + if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { set_sta_flag(sta, WLAN_STA_BLOCK_BA); - ieee80211_sta_tear_down_BA_sessions(sta, true); + ieee80211_sta_tear_down_BA_sessions( + sta, AGG_STOP_LOCAL_REQUEST); } mutex_unlock(&local->sta_mtx); } @@ -94,10 +95,9 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) WARN_ON(err != 1); local->wowlan = false; } else { - list_for_each_entry(sdata, &local->interfaces, list) { - cancel_work_sync(&sdata->work); - ieee80211_quiesce(sdata); - } + list_for_each_entry(sdata, &local->interfaces, list) + if (ieee80211_sdata_running(sdata)) + ieee80211_quiesce(sdata); goto suspend; } } @@ -124,17 +124,43 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) /* remove all interfaces */ list_for_each_entry(sdata, &local->interfaces, list) { - cancel_work_sync(&sdata->work); + static u8 zero_addr[ETH_ALEN] = {}; + u32 changed = 0; - if (!ieee80211_quiesce(sdata)) + if (!ieee80211_sdata_running(sdata)) continue; - if (!ieee80211_sdata_running(sdata)) + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MONITOR: + /* skip these */ continue; + case NL80211_IFTYPE_STATION: + if (sdata->vif.bss_conf.assoc) + changed = BSS_CHANGED_ASSOC | + BSS_CHANGED_BSSID | + BSS_CHANGED_IDLE; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_MESH_POINT: + if (sdata->vif.bss_conf.enable_beacon) + changed = BSS_CHANGED_BEACON_ENABLED; + break; + default: + break; + } - /* disable beaconing */ - ieee80211_bss_info_change_notify(sdata, - BSS_CHANGED_BEACON_ENABLED); + ieee80211_quiesce(sdata); + + sdata->suspend_bss_conf = sdata->vif.bss_conf; + memset(&sdata->vif.bss_conf, 0, sizeof(sdata->vif.bss_conf)); + sdata->vif.bss_conf.idle = true; + if (sdata->suspend_bss_conf.bssid) + sdata->vif.bss_conf.bssid = zero_addr; + + /* disable beaconing or remove association */ + ieee80211_bss_info_change_notify(sdata, changed); if (sdata->vif.type == NL80211_IFTYPE_AP && rcu_access_pointer(sdata->u.ap.beacon)) @@ -204,3 +230,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) * ieee80211_reconfig(), which is also needed for hardware * hang/firmware failure/etc. recovery. */ + +void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + cfg80211_report_wowlan_wakeup(&sdata->wdev, wakeup, gfp); +} +EXPORT_SYMBOL(ieee80211_report_wowlan_wakeup); diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 301386d..d35a5dd 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -68,6 +68,8 @@ static inline void rate_control_rate_init(struct sta_info *sta) sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band]; rcu_read_unlock(); + ieee80211_sta_set_rx_nss(sta); + ref->ops->rate_init(ref->priv, sband, ista, priv_sta); set_sta_flag(sta, WLAN_STA_RATE_CONTROL); } diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 8c5acdc..eea45a2 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -494,6 +494,33 @@ minstrel_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) kfree(mi); } +static void +minstrel_init_cck_rates(struct minstrel_priv *mp) +{ + static const int bitrates[4] = { 10, 20, 55, 110 }; + struct ieee80211_supported_band *sband; + int i, j; + + sband = mp->hw->wiphy->bands[IEEE80211_BAND_2GHZ]; + if (!sband) + return; + + for (i = 0, j = 0; i < sband->n_bitrates; i++) { + struct ieee80211_rate *rate = &sband->bitrates[i]; + + if (rate->flags & IEEE80211_RATE_ERP_G) + continue; + + for (j = 0; j < ARRAY_SIZE(bitrates); j++) { + if (rate->bitrate != bitrates[j]) + continue; + + mp->cck_rates[j] = i; + break; + } + } +} + static void * minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) { @@ -539,6 +566,8 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx); #endif + minstrel_init_cck_rates(mp); + return mp; } diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 5d278ec..5ecf757 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -79,6 +79,8 @@ struct minstrel_priv { unsigned int lookaround_rate; unsigned int lookaround_rate_mrr; + u8 cck_rates[4]; + #ifdef CONFIG_MAC80211_DEBUGFS /* * enable fixed rate processing per RC diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 9f9c453..3af141c 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org> + * Copyright (C) 2010-2013 Felix Fietkau <nbd@openwrt.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -63,6 +63,30 @@ } \ } +#define CCK_DURATION(_bitrate, _short, _len) \ + (10 /* SIFS */ + \ + (_short ? 72 + 24 : 144 + 48 ) + \ + (8 * (_len + 4) * 10) / (_bitrate)) + +#define CCK_ACK_DURATION(_bitrate, _short) \ + (CCK_DURATION((_bitrate > 10 ? 20 : 10), false, 60) + \ + CCK_DURATION(_bitrate, _short, AVG_PKT_SIZE)) + +#define CCK_DURATION_LIST(_short) \ + CCK_ACK_DURATION(10, _short), \ + CCK_ACK_DURATION(20, _short), \ + CCK_ACK_DURATION(55, _short), \ + CCK_ACK_DURATION(110, _short) + +#define CCK_GROUP \ + [MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS] = { \ + .streams = 0, \ + .duration = { \ + CCK_DURATION_LIST(false), \ + CCK_DURATION_LIST(true) \ + } \ + } + /* * To enable sufficiently targeted rate sampling, MCS rates are divided into * groups, based on the number of streams and flags (HT40, SGI) that they @@ -95,8 +119,13 @@ const struct mcs_group minstrel_mcs_groups[] = { #if MINSTREL_MAX_STREAMS >= 3 MCS_GROUP(3, 1, 1), #endif + + /* must be last */ + CCK_GROUP }; +#define MINSTREL_CCK_GROUP (ARRAY_SIZE(minstrel_mcs_groups) - 1) + static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES]; /* @@ -119,6 +148,29 @@ minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate) !!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)); } +static struct minstrel_rate_stats * +minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + struct ieee80211_tx_rate *rate) +{ + int group, idx; + + if (rate->flags & IEEE80211_TX_RC_MCS) { + group = minstrel_ht_get_group_idx(rate); + idx = rate->idx % MCS_GROUP_RATES; + } else { + group = MINSTREL_CCK_GROUP; + + for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) + if (rate->idx == mp->cck_rates[idx]) + break; + + /* short preamble */ + if (!(mi->groups[group].supported & BIT(idx))) + idx += 4; + } + return &mi->groups[group].rates[idx]; +} + static inline struct minstrel_rate_stats * minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index) { @@ -159,7 +211,7 @@ static void minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) { struct minstrel_rate_stats *mr; - unsigned int usecs; + unsigned int usecs = 0; mr = &mi->groups[group].rates[rate]; @@ -168,7 +220,9 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) return; } - usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); + if (group != MINSTREL_CCK_GROUP) + usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); + usecs += minstrel_mcs_groups[group].duration[rate]; mr->cur_tp = MINSTREL_TRUNC((1000000 / usecs) * mr->probability); } @@ -231,10 +285,6 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) if (!mr->cur_tp) continue; - /* ignore the lowest rate of each single-stream group */ - if (!i && minstrel_mcs_groups[group].streams == 1) - continue; - if ((mr->cur_tp > cur_prob_tp && mr->probability > MINSTREL_FRAC(3, 4)) || mr->probability > cur_prob) { mg->max_prob_rate = index; @@ -297,7 +347,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) } static bool -minstrel_ht_txstat_valid(struct ieee80211_tx_rate *rate) +minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct ieee80211_tx_rate *rate) { if (rate->idx < 0) return false; @@ -305,7 +355,13 @@ minstrel_ht_txstat_valid(struct ieee80211_tx_rate *rate) if (!rate->count) return false; - return !!(rate->flags & IEEE80211_TX_RC_MCS); + if (rate->flags & IEEE80211_TX_RC_MCS) + return true; + + return rate->idx == mp->cck_rates[0] || + rate->idx == mp->cck_rates[1] || + rate->idx == mp->cck_rates[2] || + rate->idx == mp->cck_rates[3]; } static void @@ -390,7 +446,6 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, struct minstrel_rate_stats *rate, *rate2; struct minstrel_priv *mp = priv; bool last; - int group; int i; if (!msp->is_ht) @@ -419,13 +474,12 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) mi->sample_packets += info->status.ampdu_len; - last = !minstrel_ht_txstat_valid(&ar[0]); + last = !minstrel_ht_txstat_valid(mp, &ar[0]); for (i = 0; !last; i++) { last = (i == IEEE80211_TX_MAX_RATES - 1) || - !minstrel_ht_txstat_valid(&ar[i + 1]); + !minstrel_ht_txstat_valid(mp, &ar[i + 1]); - group = minstrel_ht_get_group_idx(&ar[i]); - rate = &mi->groups[group].rates[ar[i].idx % 8]; + rate = minstrel_ht_get_stats(mp, mi, &ar[i]); if (last) rate->success += info->status.ampdu_ack_len; @@ -451,7 +505,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) { minstrel_ht_update_stats(mp, mi); - if (!(info->flags & IEEE80211_TX_CTL_AMPDU)) + if (!(info->flags & IEEE80211_TX_CTL_AMPDU) && + mi->max_prob_rate / MCS_GROUP_RATES != MINSTREL_CCK_GROUP) minstrel_aggr_check(sta, skb); } } @@ -467,6 +522,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, unsigned int ctime = 0; unsigned int t_slot = 9; /* FIXME */ unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len); + unsigned int overhead = 0, overhead_rtscts = 0; mr = minstrel_get_ratestats(mi, index); if (mr->probability < MINSTREL_FRAC(1, 10)) { @@ -488,9 +544,14 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, ctime += (t_slot * cw) >> 1; cw = min((cw << 1) | 1, mp->cw_max); + if (index / MCS_GROUP_RATES != MINSTREL_CCK_GROUP) { + overhead = mi->overhead; + overhead_rtscts = mi->overhead_rtscts; + } + /* Total TX time for data and Contention after first 2 tries */ - tx_time = ctime + 2 * (mi->overhead + tx_time_data); - tx_time_rtscts = ctime + 2 * (mi->overhead_rtscts + tx_time_data); + tx_time = ctime + 2 * (overhead + tx_time_data); + tx_time_rtscts = ctime + 2 * (overhead_rtscts + tx_time_data); /* See how many more tries we can fit inside segment size */ do { @@ -499,8 +560,8 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, cw = min((cw << 1) | 1, mp->cw_max); /* Total TX time after this try */ - tx_time += ctime + mi->overhead + tx_time_data; - tx_time_rtscts += ctime + mi->overhead_rtscts + tx_time_data; + tx_time += ctime + overhead + tx_time_data; + tx_time_rtscts += ctime + overhead_rtscts + tx_time_data; if (tx_time_rtscts < mp->segment_size) mr->retry_count_rtscts++; @@ -530,9 +591,16 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, else rate->count = mr->retry_count; - rate->flags = IEEE80211_TX_RC_MCS | group->flags; + rate->flags = 0; if (rtscts) rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS; + + if (index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { + rate->idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)]; + return; + } + + rate->flags |= IEEE80211_TX_RC_MCS | group->flags; rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES; } @@ -596,6 +664,22 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) } static void +minstrel_ht_check_cck_shortpreamble(struct minstrel_priv *mp, + struct minstrel_ht_sta *mi, bool val) +{ + u8 supported = mi->groups[MINSTREL_CCK_GROUP].supported; + + if (!supported || !mi->cck_supported_short) + return; + + if (supported & (mi->cck_supported_short << (val * 4))) + return; + + supported ^= mi->cck_supported_short | (mi->cck_supported_short << 4); + mi->groups[MINSTREL_CCK_GROUP].supported = supported; +} + +static void minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, struct ieee80211_tx_rate_control *txrc) { @@ -614,6 +698,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, return mac80211_minstrel.get_rate(priv, sta, &msp->legacy, txrc); info->flags |= mi->tx_flags; + minstrel_ht_check_cck_shortpreamble(mp, mi, txrc->short_preamble); /* Don't use EAPOL frames for sampling on non-mrr hw */ if (mp->hw->max_rates == 1 && @@ -687,6 +772,30 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, } static void +minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta) +{ + int i; + + if (sband->band != IEEE80211_BAND_2GHZ) + return; + + mi->cck_supported = 0; + mi->cck_supported_short = 0; + for (i = 0; i < 4; i++) { + if (!rate_supported(sta, sband->band, mp->cck_rates[i])) + continue; + + mi->cck_supported |= BIT(i); + if (sband->bitrates[i].flags & IEEE80211_RATE_SHORT_PREAMBLE) + mi->cck_supported_short |= BIT(i); + } + + mi->groups[MINSTREL_CCK_GROUP].supported = mi->cck_supported; +} + +static void minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, void *priv_sta) { @@ -699,14 +808,13 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, int ack_dur; int stbc; int i; - unsigned int smps; /* fall back to the old minstrel for legacy stations */ if (!sta->ht_cap.ht_supported) goto use_legacy; BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != - MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS); + MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS + 1); msp->is_ht = true; memset(mi, 0, sizeof(*mi)); @@ -735,28 +843,29 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, if (sta_cap & IEEE80211_HT_CAP_LDPC_CODING) mi->tx_flags |= IEEE80211_TX_CTL_LDPC; - smps = (sta_cap & IEEE80211_HT_CAP_SM_PS) >> - IEEE80211_HT_CAP_SM_PS_SHIFT; - for (i = 0; i < ARRAY_SIZE(mi->groups); i++) { - u16 req = 0; - mi->groups[i].supported = 0; - if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) { - if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) - req |= IEEE80211_HT_CAP_SGI_40; - else - req |= IEEE80211_HT_CAP_SGI_20; + if (i == MINSTREL_CCK_GROUP) { + minstrel_ht_update_cck(mp, mi, sband, sta); + continue; } - if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) - req |= IEEE80211_HT_CAP_SUP_WIDTH_20_40; + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) { + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) { + if (!(sta_cap & IEEE80211_HT_CAP_SGI_40)) + continue; + } else { + if (!(sta_cap & IEEE80211_HT_CAP_SGI_20)) + continue; + } + } - if ((sta_cap & req) != req) + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH && + sta->bandwidth < IEEE80211_STA_RX_BW_40) continue; /* Mark MCS > 7 as unsupported if STA is in static SMPS mode */ - if (smps == WLAN_HT_CAP_SM_PS_STATIC && + if (sta->smps_mode == IEEE80211_SMPS_STATIC && minstrel_mcs_groups[i].streams > 1) continue; diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index 462d2b2..302dbd5 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -107,8 +107,11 @@ struct minstrel_ht_sta { /* current MCS group to be sampled */ u8 sample_group; + u8 cck_supported; + u8 cck_supported_short; + /* MCS rate group info and statistics */ - struct minstrel_mcs_group_data groups[MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS]; + struct minstrel_mcs_group_data groups[MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS + 1]; }; struct minstrel_ht_sta_priv { diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index e788f76..df44a5a 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -15,13 +15,76 @@ #include "rc80211_minstrel.h" #include "rc80211_minstrel_ht.h" +static char * +minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) +{ + unsigned int max_mcs = MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS; + const struct mcs_group *mg; + unsigned int j, tp, prob, eprob; + char htmode = '2'; + char gimode = 'L'; + + if (!mi->groups[i].supported) + return p; + + mg = &minstrel_mcs_groups[i]; + if (mg->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + htmode = '4'; + if (mg->flags & IEEE80211_TX_RC_SHORT_GI) + gimode = 'S'; + + for (j = 0; j < MCS_GROUP_RATES; j++) { + struct minstrel_rate_stats *mr = &mi->groups[i].rates[j]; + static const int bitrates[4] = { 10, 20, 55, 110 }; + int idx = i * MCS_GROUP_RATES + j; + + if (!(mi->groups[i].supported & BIT(j))) + continue; + + if (i == max_mcs) + p += sprintf(p, "CCK/%cP ", j < 4 ? 'L' : 'S'); + else + p += sprintf(p, "HT%c0/%cGI ", htmode, gimode); + + *(p++) = (idx == mi->max_tp_rate) ? 'T' : ' '; + *(p++) = (idx == mi->max_tp_rate2) ? 't' : ' '; + *(p++) = (idx == mi->max_prob_rate) ? 'P' : ' '; + + if (i == max_mcs) { + int r = bitrates[j % 4]; + p += sprintf(p, " %2u.%1uM", r / 10, r % 10); + } else { + p += sprintf(p, " MCS%-2u", (mg->streams - 1) * + MCS_GROUP_RATES + j); + } + + tp = mr->cur_tp / 10; + prob = MINSTREL_TRUNC(mr->cur_prob * 1000); + eprob = MINSTREL_TRUNC(mr->probability * 1000); + + p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " + "%3u %3u(%3u) %8llu %8llu\n", + tp / 10, tp % 10, + eprob / 10, eprob % 10, + prob / 10, prob % 10, + mr->retry_count, + mr->last_success, + mr->last_attempts, + (unsigned long long)mr->succ_hist, + (unsigned long long)mr->att_hist); + } + + return p; +} + static int minstrel_ht_stats_open(struct inode *inode, struct file *file) { struct minstrel_ht_sta_priv *msp = inode->i_private; struct minstrel_ht_sta *mi = &msp->ht; struct minstrel_debugfs_info *ms; - unsigned int i, j, tp, prob, eprob; + unsigned int i; + unsigned int max_mcs = MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS; char *p; int ret; @@ -38,50 +101,13 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) file->private_data = ms; p = ms->buf; - p += sprintf(p, "type rate throughput ewma prob this prob " - "this succ/attempt success attempts\n"); - for (i = 0; i < MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS; i++) { - char htmode = '2'; - char gimode = 'L'; - - if (!mi->groups[i].supported) - continue; - - if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) - htmode = '4'; - if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) - gimode = 'S'; + p += sprintf(p, "type rate throughput ewma prob this prob " + "retry this succ/attempt success attempts\n"); - for (j = 0; j < MCS_GROUP_RATES; j++) { - struct minstrel_rate_stats *mr = &mi->groups[i].rates[j]; - int idx = i * MCS_GROUP_RATES + j; + p = minstrel_ht_stats_dump(mi, max_mcs, p); + for (i = 0; i < max_mcs; i++) + p = minstrel_ht_stats_dump(mi, i, p); - if (!(mi->groups[i].supported & BIT(j))) - continue; - - p += sprintf(p, "HT%c0/%cGI ", htmode, gimode); - - *(p++) = (idx == mi->max_tp_rate) ? 'T' : ' '; - *(p++) = (idx == mi->max_tp_rate2) ? 't' : ' '; - *(p++) = (idx == mi->max_prob_rate) ? 'P' : ' '; - p += sprintf(p, "MCS%-2u", (minstrel_mcs_groups[i].streams - 1) * - MCS_GROUP_RATES + j); - - tp = mr->cur_tp / 10; - prob = MINSTREL_TRUNC(mr->cur_prob * 1000); - eprob = MINSTREL_TRUNC(mr->probability * 1000); - - p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " - "%3u(%3u) %8llu %8llu\n", - tp / 10, tp % 10, - eprob / 10, eprob % 10, - prob / 10, prob % 10, - mr->last_success, - mr->last_attempts, - (unsigned long long)mr->succ_hist, - (unsigned long long)mr->att_hist); - } - } p += sprintf(p, "\nTotal packet count:: ideal %d " "lookaround %d\n", max(0, (int) mi->total_packets - (int) mi->sample_packets), diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 580704e..c6844ad 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -668,9 +668,9 @@ static inline u16 seq_sub(u16 sq1, u16 sq2) static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_rx *tid_agg_rx, - int index) + int index, + struct sk_buff_head *frames) { - struct ieee80211_local *local = sdata->local; struct sk_buff *skb = tid_agg_rx->reorder_buf[index]; struct ieee80211_rx_status *status; @@ -684,7 +684,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, tid_agg_rx->reorder_buf[index] = NULL; status = IEEE80211_SKB_RXCB(skb); status->rx_flags |= IEEE80211_RX_DEFERRED_RELEASE; - skb_queue_tail(&local->rx_skb_queue, skb); + __skb_queue_tail(frames, skb); no_frame: tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); @@ -692,7 +692,8 @@ no_frame: static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_rx *tid_agg_rx, - u16 head_seq_num) + u16 head_seq_num, + struct sk_buff_head *frames) { int index; @@ -701,7 +702,8 @@ static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) { index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; - ieee80211_release_reorder_frame(sdata, tid_agg_rx, index); + ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, + frames); } } @@ -717,7 +719,8 @@ static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata #define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10) static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, - struct tid_ampdu_rx *tid_agg_rx) + struct tid_ampdu_rx *tid_agg_rx, + struct sk_buff_head *frames) { int index, j; @@ -746,7 +749,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, ht_dbg_ratelimited(sdata, "release an RX reorder frame due to timeout on earlier frames\n"); - ieee80211_release_reorder_frame(sdata, tid_agg_rx, j); + ieee80211_release_reorder_frame(sdata, tid_agg_rx, j, + frames); /* * Increment the head seq# also for the skipped slots. @@ -756,7 +760,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, skipped = 0; } } else while (tid_agg_rx->reorder_buf[index]) { - ieee80211_release_reorder_frame(sdata, tid_agg_rx, index); + ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, + frames); index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; } @@ -788,7 +793,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, */ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_rx *tid_agg_rx, - struct sk_buff *skb) + struct sk_buff *skb, + struct sk_buff_head *frames) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; u16 sc = le16_to_cpu(hdr->seq_ctrl); @@ -816,7 +822,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata head_seq_num = seq_inc(seq_sub(mpdu_seq_num, buf_size)); /* release stored frames up to new head to stack */ ieee80211_release_reorder_frames(sdata, tid_agg_rx, - head_seq_num); + head_seq_num, frames); } /* Now the new frame is always in the range of the reordering buffer */ @@ -846,7 +852,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata tid_agg_rx->reorder_buf[index] = skb; tid_agg_rx->reorder_time[index] = jiffies; tid_agg_rx->stored_mpdu_num++; - ieee80211_sta_reorder_release(sdata, tid_agg_rx); + ieee80211_sta_reorder_release(sdata, tid_agg_rx, frames); out: spin_unlock(&tid_agg_rx->reorder_lock); @@ -857,7 +863,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata * Reorder MPDUs from A-MPDUs, keeping them on a buffer. Returns * true if the MPDU was buffered, false if it should be processed. */ -static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx) +static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, + struct sk_buff_head *frames) { struct sk_buff *skb = rx->skb; struct ieee80211_local *local = rx->local; @@ -922,11 +929,12 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx) * sure that we cannot get to it any more before doing * anything with it. */ - if (ieee80211_sta_manage_reorder_buf(rx->sdata, tid_agg_rx, skb)) + if (ieee80211_sta_manage_reorder_buf(rx->sdata, tid_agg_rx, skb, + frames)) return; dont_reorder: - skb_queue_tail(&local->rx_skb_queue, skb); + __skb_queue_tail(frames, skb); } static ieee80211_rx_result debug_noinline @@ -1452,6 +1460,10 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) } } + /* mesh power save support */ + if (ieee80211_vif_is_mesh(&rx->sdata->vif)) + ieee80211_mps_rx_h_sta_process(sta, hdr); + /* * Drop (qos-)data::nullfunc frames silently, since they * are used only to control station power saving mode. @@ -2015,7 +2027,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) /* frame is in RMC, don't forward */ if (ieee80211_is_data(hdr->frame_control) && is_multicast_ether_addr(hdr->addr1) && - mesh_rmc_check(hdr->addr3, mesh_hdr, rx->sdata)) + mesh_rmc_check(rx->sdata, hdr->addr3, mesh_hdr)) return RX_DROP_MONITOR; if (!ieee80211_is_data(hdr->frame_control) || @@ -2042,9 +2054,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) } rcu_read_lock(); - mppath = mpp_path_lookup(proxied_addr, sdata); + mppath = mpp_path_lookup(sdata, proxied_addr); if (!mppath) { - mpp_path_add(proxied_addr, mpp_addr, sdata); + mpp_path_add(sdata, proxied_addr, mpp_addr); } else { spin_lock_bh(&mppath->state_lock); if (!ether_addr_equal(mppath->mpp, mpp_addr)) @@ -2090,12 +2102,15 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (is_multicast_ether_addr(fwd_hdr->addr1)) { IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_mcast); memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN); - } else if (!mesh_nexthop_lookup(fwd_skb, sdata)) { + /* update power mode indication when forwarding */ + ieee80211_mps_set_frame_flags(sdata, NULL, fwd_hdr); + } else if (!mesh_nexthop_lookup(sdata, fwd_skb)) { + /* mesh power mode flags updated in mesh_nexthop_lookup */ IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_unicast); } else { /* unable to resolve next hop */ - mesh_path_error_tx(ifmsh->mshcfg.element_ttl, fwd_hdr->addr3, - 0, reason, fwd_hdr->addr2, sdata); + mesh_path_error_tx(sdata, ifmsh->mshcfg.element_ttl, + fwd_hdr->addr3, 0, reason, fwd_hdr->addr2); IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_no_route); kfree_skb(fwd_skb); return RX_DROP_MONITOR; @@ -2177,7 +2192,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) } static ieee80211_rx_result debug_noinline -ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) +ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) { struct sk_buff *skb = rx->skb; struct ieee80211_bar *bar = (struct ieee80211_bar *)skb->data; @@ -2216,7 +2231,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) spin_lock(&tid_agg_rx->reorder_lock); /* release stored frames up to start of BAR */ ieee80211_release_reorder_frames(rx->sdata, tid_agg_rx, - start_seq_num); + start_seq_num, frames); spin_unlock(&tid_agg_rx->reorder_lock); kfree_skb(skb); @@ -2353,38 +2368,34 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) sdata->vif.type != NL80211_IFTYPE_ADHOC) break; - /* verify action & smps_control are present */ + /* verify action & smps_control/chanwidth are present */ if (len < IEEE80211_MIN_ACTION_SIZE + 2) goto invalid; switch (mgmt->u.action.u.ht_smps.action) { case WLAN_HT_ACTION_SMPS: { struct ieee80211_supported_band *sband; - u8 smps; + enum ieee80211_smps_mode smps_mode; /* convert to HT capability */ switch (mgmt->u.action.u.ht_smps.smps_control) { case WLAN_HT_SMPS_CONTROL_DISABLED: - smps = WLAN_HT_CAP_SM_PS_DISABLED; + smps_mode = IEEE80211_SMPS_OFF; break; case WLAN_HT_SMPS_CONTROL_STATIC: - smps = WLAN_HT_CAP_SM_PS_STATIC; + smps_mode = IEEE80211_SMPS_STATIC; break; case WLAN_HT_SMPS_CONTROL_DYNAMIC: - smps = WLAN_HT_CAP_SM_PS_DYNAMIC; + smps_mode = IEEE80211_SMPS_DYNAMIC; break; default: goto invalid; } - smps <<= IEEE80211_HT_CAP_SM_PS_SHIFT; /* if no change do nothing */ - if ((rx->sta->sta.ht_cap.cap & - IEEE80211_HT_CAP_SM_PS) == smps) + if (rx->sta->sta.smps_mode == smps_mode) goto handled; - - rx->sta->sta.ht_cap.cap &= ~IEEE80211_HT_CAP_SM_PS; - rx->sta->sta.ht_cap.cap |= smps; + rx->sta->sta.smps_mode = smps_mode; sband = rx->local->hw.wiphy->bands[status->band]; @@ -2392,11 +2403,66 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) IEEE80211_RC_SMPS_CHANGED); goto handled; } + case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: { + struct ieee80211_supported_band *sband; + u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth; + enum ieee80211_sta_rx_bandwidth new_bw; + + /* If it doesn't support 40 MHz it can't change ... */ + if (!(rx->sta->sta.ht_cap.cap & + IEEE80211_HT_CAP_SUP_WIDTH_20_40)) + goto handled; + + if (chanwidth == IEEE80211_HT_CHANWIDTH_20MHZ) + new_bw = IEEE80211_STA_RX_BW_20; + else + new_bw = ieee80211_sta_cur_vht_bw(rx->sta); + + if (rx->sta->sta.bandwidth == new_bw) + goto handled; + + sband = rx->local->hw.wiphy->bands[status->band]; + + rate_control_rate_update(local, sband, rx->sta, + IEEE80211_RC_BW_CHANGED); + goto handled; + } default: goto invalid; } break; + case WLAN_CATEGORY_VHT: + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_ADHOC) + break; + + /* verify action code is present */ + if (len < IEEE80211_MIN_ACTION_SIZE + 1) + goto invalid; + + switch (mgmt->u.action.u.vht_opmode_notif.action_code) { + case WLAN_VHT_ACTION_OPMODE_NOTIF: { + u8 opmode; + + /* verify opmode is present */ + if (len < IEEE80211_MIN_ACTION_SIZE + 2) + goto invalid; + + opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode; + + ieee80211_vht_handle_opmode(rx->sdata, rx->sta, + opmode, status->band, + false); + goto handled; + } + default: + break; + } + break; case WLAN_CATEGORY_BACK: if (sdata->vif.type != NL80211_IFTYPE_STATION && sdata->vif.type != NL80211_IFTYPE_MESH_POINT && @@ -2609,7 +2675,19 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) memset(nskb->cb, 0, sizeof(nskb->cb)); - ieee80211_tx_skb(rx->sdata, nskb); + if (rx->sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(nskb); + + info->flags = IEEE80211_TX_CTL_TX_OFFCHAN | + IEEE80211_TX_INTFL_OFFCHAN_TX_OK | + IEEE80211_TX_CTL_NO_CCK_RATE; + if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) + info->hw_queue = + local->hw.offchannel_tx_hw_queue; + } + + __ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7, + status->band); } dev_kfree_skb(rx->skb); return RX_QUEUED; @@ -2648,8 +2726,9 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) return RX_DROP_MONITOR; break; case cpu_to_le16(IEEE80211_STYPE_PROBE_REQ): - /* process only for ibss */ - if (sdata->vif.type != NL80211_IFTYPE_ADHOC) + /* process only for ibss and mesh */ + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return RX_DROP_MONITOR; break; default: @@ -2772,7 +2851,8 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, } } -static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx) +static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, + struct sk_buff_head *frames) { ieee80211_rx_result res = RX_DROP_MONITOR; struct sk_buff *skb; @@ -2784,15 +2864,9 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx) goto rxh_next; \ } while (0); - spin_lock(&rx->local->rx_skb_queue.lock); - if (rx->local->running_rx_handler) - goto unlock; - - rx->local->running_rx_handler = true; - - while ((skb = __skb_dequeue(&rx->local->rx_skb_queue))) { - spin_unlock(&rx->local->rx_skb_queue.lock); + spin_lock_bh(&rx->local->rx_path_lock); + while ((skb = __skb_dequeue(frames))) { /* * all the other fields are valid across frames * that belong to an aMPDU since they are on the @@ -2813,7 +2887,12 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx) #endif CALL_RXH(ieee80211_rx_h_amsdu) CALL_RXH(ieee80211_rx_h_data) - CALL_RXH(ieee80211_rx_h_ctrl); + + /* special treatment -- needs the queue */ + res = ieee80211_rx_h_ctrl(rx, frames); + if (res != RX_CONTINUE) + goto rxh_next; + CALL_RXH(ieee80211_rx_h_mgmt_check) CALL_RXH(ieee80211_rx_h_action) CALL_RXH(ieee80211_rx_h_userspace_mgmt) @@ -2822,20 +2901,20 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx) rxh_next: ieee80211_rx_handlers_result(rx, res); - spin_lock(&rx->local->rx_skb_queue.lock); + #undef CALL_RXH } - rx->local->running_rx_handler = false; - - unlock: - spin_unlock(&rx->local->rx_skb_queue.lock); + spin_unlock_bh(&rx->local->rx_path_lock); } static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) { + struct sk_buff_head reorder_release; ieee80211_rx_result res = RX_DROP_MONITOR; + __skb_queue_head_init(&reorder_release); + #define CALL_RXH(rxh) \ do { \ res = rxh(rx); \ @@ -2845,9 +2924,9 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) CALL_RXH(ieee80211_rx_h_check) - ieee80211_rx_reorder_ampdu(rx); + ieee80211_rx_reorder_ampdu(rx, &reorder_release); - ieee80211_rx_handlers(rx); + ieee80211_rx_handlers(rx, &reorder_release); return; rxh_next: @@ -2862,6 +2941,7 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) */ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) { + struct sk_buff_head frames; struct ieee80211_rx_data rx = { .sta = sta, .sdata = sta->sdata, @@ -2877,11 +2957,13 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) if (!tid_agg_rx) return; + __skb_queue_head_init(&frames); + spin_lock(&tid_agg_rx->reorder_lock); - ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx); + ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx, &frames); spin_unlock(&tid_agg_rx->reorder_lock); - ieee80211_rx_handlers(&rx); + ieee80211_rx_handlers(&rx, &frames); } /* main receive path */ diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 8ed83dc..43a45cf 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -27,22 +27,15 @@ #define IEEE80211_PROBE_DELAY (HZ / 33) #define IEEE80211_CHANNEL_TIME (HZ / 33) -#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 8) - -static void ieee80211_rx_bss_free(struct cfg80211_bss *cbss) -{ - struct ieee80211_bss *bss = (void *)cbss->priv; - - kfree(bss_mesh_id(bss)); - kfree(bss_mesh_cfg(bss)); -} +#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 9) void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_bss *bss) { if (!bss) return; - cfg80211_put_bss(container_of((void *)bss, struct cfg80211_bss, priv)); + cfg80211_put_bss(local->hw.wiphy, + container_of((void *)bss, struct cfg80211_bss, priv)); } static bool is_uapsd_supported(struct ieee802_11_elems *elems) @@ -65,12 +58,11 @@ static bool is_uapsd_supported(struct ieee802_11_elems *elems) struct ieee80211_bss * ieee80211_bss_info_update(struct ieee80211_local *local, struct ieee80211_rx_status *rx_status, - struct ieee80211_mgmt *mgmt, - size_t len, + struct ieee80211_mgmt *mgmt, size_t len, struct ieee802_11_elems *elems, - struct ieee80211_channel *channel, - bool beacon) + struct ieee80211_channel *channel) { + bool beacon = ieee80211_is_beacon(mgmt->frame_control); struct cfg80211_bss *cbss; struct ieee80211_bss *bss; int clen, srlen; @@ -86,10 +78,12 @@ ieee80211_bss_info_update(struct ieee80211_local *local, if (!cbss) return NULL; - cbss->free_priv = ieee80211_rx_bss_free; bss = (void *)cbss->priv; - bss->device_ts = rx_status->device_timestamp; + if (beacon) + bss->device_ts_beacon = rx_status->device_timestamp; + else + bss->device_ts_presp = rx_status->device_timestamp; if (elems->parse_error) { if (beacon) @@ -113,18 +107,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss->valid_data |= IEEE80211_BSS_VALID_ERP; } - if (elems->tim && (!elems->parse_error || - !(bss->valid_data & IEEE80211_BSS_VALID_DTIM))) { - struct ieee80211_tim_ie *tim_ie = elems->tim; - bss->dtim_period = tim_ie->dtim_period; - if (!elems->parse_error) - bss->valid_data |= IEEE80211_BSS_VALID_DTIM; - } - - /* If the beacon had no TIM IE, or it was invalid, use 1 */ - if (beacon && !bss->dtim_period) - bss->dtim_period = 1; - /* replace old supported rates if we get new values */ if (!elems->parse_error || !(bss->valid_data & IEEE80211_BSS_VALID_RATES)) { @@ -159,9 +141,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss->valid_data |= IEEE80211_BSS_VALID_WMM; } - if (!beacon) - bss->last_probe_resp = jiffies; - return bss; } @@ -215,7 +194,7 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) bss = ieee80211_bss_info_update(local, rx_status, mgmt, skb->len, &elems, - channel, beacon); + channel); if (bss) ieee80211_rx_bss_put(local, bss); } @@ -304,7 +283,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, if (!was_hw_scan) { ieee80211_configure_filter(local); drv_sw_scan_complete(local); - ieee80211_offchannel_return(local, true); + ieee80211_offchannel_return(local); } ieee80211_recalc_idle(local); @@ -353,7 +332,10 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) local->next_scan_state = SCAN_DECISION; local->scan_channel_idx = 0; - ieee80211_offchannel_stop_vifs(local, true); + ieee80211_offchannel_stop_vifs(local); + + /* ensure nullfunc is transmitted before leaving operating channel */ + drv_flush(local, false); ieee80211_configure_filter(local); @@ -369,6 +351,9 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) static bool ieee80211_can_scan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { + if (local->radar_detect_enabled) + return false; + if (!list_empty(&local->roc_list)) return false; @@ -403,6 +388,11 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, int i; struct ieee80211_sub_if_data *sdata; enum ieee80211_band band = local->hw.conf.channel->band; + u32 tx_flags; + + tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK; + if (local->scan_req->no_cck) + tx_flags |= IEEE80211_TX_CTL_NO_CCK_RATE; sdata = rcu_dereference_protected(local->scan_sdata, lockdep_is_held(&local->mtx)); @@ -414,8 +404,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, local->scan_req->ssids[i].ssid_len, local->scan_req->ie, local->scan_req->ie_len, local->scan_req->rates[band], false, - local->scan_req->no_cck, - local->hw.conf.channel, true); + tx_flags, local->hw.conf.channel, true); /* * After sending probe requests, wait for probe responses @@ -559,8 +548,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, bool associated = false; bool tx_empty = true; bool bad_latency; - bool listen_int_exceeded; - unsigned long min_beacon_int = 0; struct ieee80211_sub_if_data *sdata; struct ieee80211_channel *next_chan; enum mac80211_scan_state next_scan_state; @@ -579,11 +566,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, if (sdata->u.mgd.associated) { associated = true; - if (sdata->vif.bss_conf.beacon_int < - min_beacon_int || min_beacon_int == 0) - min_beacon_int = - sdata->vif.bss_conf.beacon_int; - if (!qdisc_all_tx_empty(sdata->dev)) { tx_empty = false; break; @@ -600,34 +582,19 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, * see if we can scan another channel without interfering * with the current traffic situation. * - * Since we don't know if the AP has pending frames for us - * we can only check for our tx queues and use the current - * pm_qos requirements for rx. Hence, if no tx traffic occurs - * at all we will scan as many channels in a row as the pm_qos - * latency allows us to. Additionally we also check for the - * currently negotiated listen interval to prevent losing - * frames unnecessarily. - * - * Otherwise switch back to the operating channel. + * Keep good latency, do not stay off-channel more than 125 ms. */ bad_latency = time_after(jiffies + - ieee80211_scan_get_channel_time(next_chan), - local->leave_oper_channel_time + - usecs_to_jiffies(pm_qos_request(PM_QOS_NETWORK_LATENCY))); - - listen_int_exceeded = time_after(jiffies + - ieee80211_scan_get_channel_time(next_chan), - local->leave_oper_channel_time + - usecs_to_jiffies(min_beacon_int * 1024) * - local->hw.conf.listen_interval); + ieee80211_scan_get_channel_time(next_chan), + local->leave_oper_channel_time + HZ / 8); if (associated && !tx_empty) { if (local->scan_req->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) next_scan_state = SCAN_ABORT; else next_scan_state = SCAN_SUSPEND; - } else if (associated && (bad_latency || listen_int_exceeded)) { + } else if (associated && bad_latency) { next_scan_state = SCAN_SUSPEND; } else { next_scan_state = SCAN_SET_CHANNEL; @@ -690,12 +657,8 @@ static void ieee80211_scan_state_suspend(struct ieee80211_local *local, local->scan_channel = NULL; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - /* - * Re-enable vifs and beaconing. Leave PS - * in off-channel state..will put that back - * on-channel at the end of scanning. - */ - ieee80211_offchannel_return(local, false); + /* disable PS */ + ieee80211_offchannel_return(local); *next_delay = HZ / 5; /* afterwards, resume scan & go to next channel */ @@ -705,8 +668,7 @@ static void ieee80211_scan_state_suspend(struct ieee80211_local *local, static void ieee80211_scan_state_resume(struct ieee80211_local *local, unsigned long *next_delay) { - /* PS already is in off-channel mode */ - ieee80211_offchannel_stop_vifs(local, false); + ieee80211_offchannel_stop_vifs(local); if (local->ops->flush) { drv_flush(local, false); @@ -832,9 +794,9 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, return res; } -int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, - const u8 *ssid, u8 ssid_len, - struct ieee80211_channel *chan) +int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, + const u8 *ssid, u8 ssid_len, + struct ieee80211_channel *chan) { struct ieee80211_local *local = sdata->local; int ret = -EBUSY; @@ -848,22 +810,36 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, /* fill internal scan request */ if (!chan) { - int i, nchan = 0; + int i, max_n; + int n_ch = 0; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (!local->hw.wiphy->bands[band]) continue; - for (i = 0; - i < local->hw.wiphy->bands[band]->n_channels; - i++) { - local->int_scan_req->channels[nchan] = + + max_n = local->hw.wiphy->bands[band]->n_channels; + for (i = 0; i < max_n; i++) { + struct ieee80211_channel *tmp_ch = &local->hw.wiphy->bands[band]->channels[i]; - nchan++; + + if (tmp_ch->flags & (IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_DISABLED)) + continue; + + local->int_scan_req->channels[n_ch] = tmp_ch; + n_ch++; } } - local->int_scan_req->n_channels = nchan; + if (WARN_ON_ONCE(n_ch == 0)) + goto unlock; + + local->int_scan_req->n_channels = n_ch; } else { + if (WARN_ON_ONCE(chan->flags & (IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_DISABLED))) + goto unlock; + local->int_scan_req->channels[0] = chan; local->int_scan_req->n_channels = 1; } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f3e5025..238a0cc 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -91,9 +91,8 @@ static int sta_info_hash_del(struct ieee80211_local *local, return -ENOENT; } -static void free_sta_work(struct work_struct *wk) +static void cleanup_single_sta(struct sta_info *sta) { - struct sta_info *sta = container_of(wk, struct sta_info, free_sta_wk); int ac, i; struct tid_ampdu_tx *tid_tx; struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -105,12 +104,24 @@ static void free_sta_work(struct work_struct *wk) * neither mac80211 nor the driver can reference this * sta struct any more except by still existing timers * associated with this station that we clean up below. + * + * Note though that this still uses the sdata and even + * calls the driver in AP and mesh mode, so interfaces + * of those types mush use call sta_info_flush_cleanup() + * (typically via sta_info_flush()) before deconfiguring + * the driver. + * + * In station mode, nothing happens here so it doesn't + * have to (and doesn't) do that, this is intentional to + * speed up roaming. */ if (test_sta_flag(sta, WLAN_STA_PS_STA)) { if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ps = &sdata->bss->ps; + else if (ieee80211_vif_is_mesh(&sdata->vif)) + ps = &sdata->u.mesh.ps; else return; @@ -126,13 +137,8 @@ static void free_sta_work(struct work_struct *wk) ieee80211_purge_tx_queue(&local->hw, &sta->tx_filtered[ac]); } -#ifdef CONFIG_MAC80211_MESH - if (ieee80211_vif_is_mesh(&sdata->vif)) { - mesh_accept_plinks_update(sdata); - mesh_plink_deactivate(sta); - del_timer_sync(&sta->plink_timer); - } -#endif + if (ieee80211_vif_is_mesh(&sdata->vif)) + mesh_sta_cleanup(sta); cancel_work_sync(&sta->drv_unblock_wk); @@ -153,11 +159,35 @@ static void free_sta_work(struct work_struct *wk) sta_info_free(local, sta); } +void ieee80211_cleanup_sdata_stas(struct ieee80211_sub_if_data *sdata) +{ + struct sta_info *sta; + + spin_lock_bh(&sdata->cleanup_stations_lock); + while (!list_empty(&sdata->cleanup_stations)) { + sta = list_first_entry(&sdata->cleanup_stations, + struct sta_info, list); + list_del(&sta->list); + spin_unlock_bh(&sdata->cleanup_stations_lock); + + cleanup_single_sta(sta); + + spin_lock_bh(&sdata->cleanup_stations_lock); + } + + spin_unlock_bh(&sdata->cleanup_stations_lock); +} + static void free_sta_rcu(struct rcu_head *h) { struct sta_info *sta = container_of(h, struct sta_info, rcu_head); + struct ieee80211_sub_if_data *sdata = sta->sdata; - ieee80211_queue_work(&sta->local->hw, &sta->free_sta_wk); + spin_lock(&sdata->cleanup_stations_lock); + list_add_tail(&sta->list, &sdata->cleanup_stations); + spin_unlock(&sdata->cleanup_stations_lock); + + ieee80211_queue_work(&sdata->local->hw, &sdata->cleanup_stations_wk); } /* protected by RCU */ @@ -310,7 +340,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, spin_lock_init(&sta->lock); INIT_WORK(&sta->drv_unblock_wk, sta_unblock); - INIT_WORK(&sta->free_sta_wk, free_sta_work); INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); @@ -346,12 +375,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, for (i = 0; i < IEEE80211_NUM_TIDS; i++) sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); - sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); + sta->sta.smps_mode = IEEE80211_SMPS_OFF; -#ifdef CONFIG_MAC80211_MESH - sta->plink_state = NL80211_PLINK_LISTEN; - init_timer(&sta->plink_timer); -#endif + sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); return sta; } @@ -547,7 +573,6 @@ void sta_info_recalc_tim(struct sta_info *sta) { struct ieee80211_local *local = sta->local; struct ps_data *ps; - unsigned long flags; bool indicate_tim = false; u8 ignore_for_tim = sta->sta.uapsd_queues; int ac; @@ -560,6 +585,12 @@ void sta_info_recalc_tim(struct sta_info *sta) ps = &sta->sdata->bss->ps; id = sta->sta.aid; +#ifdef CONFIG_MAC80211_MESH + } else if (ieee80211_vif_is_mesh(&sta->sdata->vif)) { + ps = &sta->sdata->u.mesh.ps; + /* TIM map only for PLID <= IEEE80211_MAX_AID */ + id = le16_to_cpu(sta->plid) % IEEE80211_MAX_AID; +#endif } else { return; } @@ -598,7 +629,7 @@ void sta_info_recalc_tim(struct sta_info *sta) } done: - spin_lock_irqsave(&local->tim_lock, flags); + spin_lock_bh(&local->tim_lock); if (indicate_tim) __bss_tim_set(ps->tim, id); @@ -611,7 +642,7 @@ void sta_info_recalc_tim(struct sta_info *sta) local->tim_in_locked_section = false; } - spin_unlock_irqrestore(&local->tim_lock, flags); + spin_unlock_bh(&local->tim_lock); } static bool sta_info_buffer_expired(struct sta_info *sta, struct sk_buff *skb) @@ -718,8 +749,9 @@ static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, bool have_buffered = false; int ac; - /* This is only necessary for stations on BSS interfaces */ - if (!sta->sdata->bss) + /* This is only necessary for stations on BSS/MBSS interfaces */ + if (!sta->sdata->bss && + !ieee80211_vif_is_mesh(&sta->sdata->vif)) return false; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) @@ -734,6 +766,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta) struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; int ret, i; + bool have_key = false; might_sleep(); @@ -752,7 +785,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta) * will be sufficient. */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); - ieee80211_sta_tear_down_BA_sessions(sta, false); + ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA); ret = sta_info_hash_del(local, sta); if (ret) @@ -761,12 +794,19 @@ int __must_check __sta_info_destroy(struct sta_info *sta) list_del_rcu(&sta->list); mutex_lock(&local->key_mtx); - for (i = 0; i < NUM_DEFAULT_KEYS; i++) + for (i = 0; i < NUM_DEFAULT_KEYS; i++) { __ieee80211_key_free(key_mtx_dereference(local, sta->gtk[i])); - if (sta->ptk) + have_key = true; + } + if (sta->ptk) { __ieee80211_key_free(key_mtx_dereference(local, sta->ptk)); + have_key = true; + } mutex_unlock(&local->key_mtx); + if (!have_key) + synchronize_net(); + sta->dead = true; local->num_sta--; @@ -862,21 +902,13 @@ void sta_info_init(struct ieee80211_local *local) void sta_info_stop(struct ieee80211_local *local) { - del_timer(&local->sta_cleanup); - sta_info_flush(local, NULL); + del_timer_sync(&local->sta_cleanup); } -/** - * sta_info_flush - flush matching STA entries from the STA table - * - * Returns the number of removed STA entries. - * - * @local: local interface data - * @sdata: matching rule for the net device (sta->dev) or %NULL to match all STAs - */ -int sta_info_flush(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) + +int sta_info_flush_defer(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; int ret = 0; @@ -884,7 +916,7 @@ int sta_info_flush(struct ieee80211_local *local, mutex_lock(&local->sta_mtx); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { - if (!sdata || sdata == sta->sdata) { + if (sdata == sta->sdata) { WARN_ON(__sta_info_destroy(sta)); ret++; } @@ -894,6 +926,12 @@ int sta_info_flush(struct ieee80211_local *local, return ret; } +void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata) +{ + ieee80211_cleanup_sdata_stas(sdata); + cancel_work_sync(&sdata->cleanup_stations_wk); +} + void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time) { @@ -909,6 +947,11 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, if (time_after(jiffies, sta->last_rx + exp_time)) { sta_dbg(sta->sdata, "expiring inactive STA %pM\n", sta->sta.addr); + + if (ieee80211_vif_is_mesh(&sdata->vif) && + test_sta_flag(sta, WLAN_STA_PS_STA)) + atomic_dec(&sdata->u.mesh.ps.num_sta_ps); + WARN_ON(__sta_info_destroy(sta)); } } @@ -967,6 +1010,8 @@ static void clear_sta_ps_flags(void *_sta) if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ps = &sdata->bss->ps; + else if (ieee80211_vif_is_mesh(&sdata->vif)) + ps = &sdata->u.mesh.ps; else return; @@ -1084,6 +1129,8 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, drv_allow_buffered_frames(local, sta, BIT(tid), 1, reason, false); + skb->dev = sdata->dev; + rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (WARN_ON(!chanctx_conf)) { diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 1489bca..4947341 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -56,6 +56,8 @@ * @WLAN_STA_INSERTED: This station is inserted into the hash table. * @WLAN_STA_RATE_CONTROL: rate control was initialized for this station. * @WLAN_STA_TOFFSET_KNOWN: toffset calculated for this station is valid. + * @WLAN_STA_MPSP_OWNER: local STA is owner of a mesh Peer Service Period. + * @WLAN_STA_MPSP_RECIPIENT: local STA is recipient of a MPSP. */ enum ieee80211_sta_info_flags { WLAN_STA_AUTH, @@ -78,6 +80,8 @@ enum ieee80211_sta_info_flags { WLAN_STA_INSERTED, WLAN_STA_RATE_CONTROL, WLAN_STA_TOFFSET_KNOWN, + WLAN_STA_MPSP_OWNER, + WLAN_STA_MPSP_RECIPIENT, }; #define ADDBA_RESP_INTERVAL HZ @@ -92,6 +96,13 @@ enum ieee80211_sta_info_flags { #define HT_AGG_STATE_WANT_START 4 #define HT_AGG_STATE_WANT_STOP 5 +enum ieee80211_agg_stop_reason { + AGG_STOP_DECLINED, + AGG_STOP_LOCAL_REQUEST, + AGG_STOP_PEER_REQUEST, + AGG_STOP_DESTROY_STA, +}; + /** * struct tid_ampdu_tx - TID aggregation information (Tx). * @@ -274,7 +285,9 @@ struct sta_ampdu_mlme { * @t_offset: timing offset relative to this host * @t_offset_setpoint: reference timing offset of this sta to be used when * calculating clockdrift - * @ch_width: peer's channel width + * @local_pm: local link-specific power save mode + * @peer_pm: peer-specific power save mode towards local STA + * @nonpeer_pm: STA power save mode towards non-peer neighbors * @debugfs: debug filesystem info * @dead: set to true when sta is unlinked * @uploaded: set to true when sta is uploaded to the driver @@ -282,8 +295,9 @@ struct sta_ampdu_mlme { * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) * @beacon_loss_count: number of times beacon loss has triggered - * @supports_40mhz: tracks whether the station advertised 40 MHz support - * as we overwrite its HT parameters with the currently used value + * @rcu_head: RCU head used for freeing this station struct + * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, + * taken from HT/VHT capabilities or VHT operating mode notification */ struct sta_info { /* General information, mostly static */ @@ -299,7 +313,6 @@ struct sta_info { spinlock_t lock; struct work_struct drv_unblock_wk; - struct work_struct free_sta_wk; u16 listen_interval; @@ -372,7 +385,10 @@ struct sta_info { struct timer_list plink_timer; s64 t_offset; s64 t_offset_setpoint; - enum nl80211_chan_width ch_width; + /* mesh power save */ + enum nl80211_mesh_power_mode local_pm; + enum nl80211_mesh_power_mode peer_pm; + enum nl80211_mesh_power_mode nonpeer_pm; #endif #ifdef CONFIG_MAC80211_DEBUGFS @@ -382,11 +398,11 @@ struct sta_info { } debugfs; #endif + enum ieee80211_sta_rx_bandwidth cur_max_bandwidth; + unsigned int lost_packets; unsigned int beacon_loss_count; - bool supports_40mhz; - /* keep last! */ struct ieee80211_sta sta; }; @@ -549,8 +565,39 @@ void sta_info_recalc_tim(struct sta_info *sta); void sta_info_init(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); -int sta_info_flush(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata); +int sta_info_flush_defer(struct ieee80211_sub_if_data *sdata); + +/** + * sta_info_flush_cleanup - flush the sta_info cleanup queue + * @sdata: the interface + * + * Flushes the sta_info cleanup queue for a given interface; + * this is necessary before the interface is removed or, for + * AP/mesh interfaces, before it is deconfigured. + * + * Note an rcu_barrier() must precede the function, after all + * stations have been flushed/removed to ensure the call_rcu() + * calls that add stations to the cleanup queue have completed. + */ +void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata); + +/** + * sta_info_flush - flush matching STA entries from the STA table + * + * Returns the number of removed STA entries. + * + * @sdata: sdata to remove all stations from + */ +static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata) +{ + int ret = sta_info_flush_defer(sdata); + + rcu_barrier(); + sta_info_flush_cleanup(sdata); + + return ret; +} + void sta_set_rate_info_tx(struct sta_info *sta, const struct ieee80211_tx_rate *rate, struct rate_info *rinfo); @@ -563,4 +610,6 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta); void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta); void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta); +void ieee80211_cleanup_sdata_stas(struct ieee80211_sub_if_data *sdata); + #endif /* STA_INFO_H */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 07d9957..4343920 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -335,7 +335,8 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, if (dropped) acked = false; - if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { + if (info->flags & (IEEE80211_TX_INTFL_NL80211_FRAME_TX | + IEEE80211_TX_INTFL_MLME_CONN_TX)) { struct ieee80211_sub_if_data *sdata = NULL; struct ieee80211_sub_if_data *iter_sdata; u64 cookie = (unsigned long)skb; @@ -357,10 +358,13 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, sdata = rcu_dereference(local->p2p_sdata); } - if (!sdata) + if (!sdata) { skb->dev = NULL; - else if (ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) { + } else if (info->flags & IEEE80211_TX_INTFL_MLME_CONN_TX) { + ieee80211_mgd_conn_tx_status(sdata, hdr->frame_control, + acked); + } else if (ieee80211_is_nullfunc(hdr->frame_control) || + ieee80211_is_qos_nullfunc(hdr->frame_control)) { cfg80211_probe_status(sdata->dev, hdr->addr1, cookie, acked, GFP_ATOMIC); } else { @@ -468,6 +472,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) return; } + /* mesh Peer Service Period support */ + if (ieee80211_vif_is_mesh(&sta->sdata->vif) && + ieee80211_is_data_qos(fc)) + ieee80211_mpsp_trigger_process( + ieee80211_get_qos_ctl(hdr), + sta, true, acked); + if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) && (rates_idx != -1)) sta->last_tx_rate = info->status.rates[rates_idx]; @@ -502,11 +513,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) IEEE80211_BAR_CTRL_TID_INFO_MASK) >> IEEE80211_BAR_CTRL_TID_INFO_SHIFT; - if (local->hw.flags & - IEEE80211_HW_TEARDOWN_AGGR_ON_BAR_FAIL) - ieee80211_stop_tx_ba_session(&sta->sta, tid); - else - ieee80211_set_bar_pending(sta, tid, ssn); + ieee80211_set_bar_pending(sta, tid, ssn); } } diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 57e14d5..3ed801d 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -177,12 +177,11 @@ void ieee80211_get_tkip_p1k_iv(struct ieee80211_key_conf *keyconf, struct ieee80211_key *key = (struct ieee80211_key *) container_of(keyconf, struct ieee80211_key, conf); struct tkip_ctx *ctx = &key->u.tkip.tx; - unsigned long flags; - spin_lock_irqsave(&key->u.tkip.txlock, flags); + spin_lock_bh(&key->u.tkip.txlock); ieee80211_compute_tkip_p1k(key, iv32); memcpy(p1k, ctx->p1k, sizeof(ctx->p1k)); - spin_unlock_irqrestore(&key->u.tkip.txlock, flags); + spin_unlock_bh(&key->u.tkip.txlock); } EXPORT_SYMBOL(ieee80211_get_tkip_p1k_iv); @@ -208,12 +207,11 @@ void ieee80211_get_tkip_p2k(struct ieee80211_key_conf *keyconf, const u8 *data = (u8 *)hdr + ieee80211_hdrlen(hdr->frame_control); u32 iv32 = get_unaligned_le32(&data[4]); u16 iv16 = data[2] | (data[0] << 8); - unsigned long flags; - spin_lock_irqsave(&key->u.tkip.txlock, flags); + spin_lock_bh(&key->u.tkip.txlock); ieee80211_compute_tkip_p1k(key, iv32); tkip_mixing_phase2(tk, ctx, iv16, p2k); - spin_unlock_irqrestore(&key->u.tkip.txlock, flags); + spin_unlock_bh(&key->u.tkip.txlock); } EXPORT_SYMBOL(ieee80211_get_tkip_p2k); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index a8270b4..3d7cd2a 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -28,21 +28,27 @@ #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" -#define CHANCTX_ENTRY __field(u32, control_freq) \ +#define CHANDEF_ENTRY __field(u32, control_freq) \ __field(u32, chan_width) \ __field(u32, center_freq1) \ - __field(u32, center_freq2) \ + __field(u32, center_freq2) +#define CHANDEF_ASSIGN(c) \ + __entry->control_freq = (c)->chan->center_freq; \ + __entry->chan_width = (c)->width; \ + __entry->center_freq1 = (c)->center_freq1; \ + __entry->center_freq2 = (c)->center_freq2; +#define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz" +#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ + __entry->center_freq1, __entry->center_freq2 + +#define CHANCTX_ENTRY CHANDEF_ENTRY \ __field(u8, rx_chains_static) \ __field(u8, rx_chains_dynamic) -#define CHANCTX_ASSIGN __entry->control_freq = ctx->conf.def.chan->center_freq;\ - __entry->chan_width = ctx->conf.def.width; \ - __entry->center_freq1 = ctx->conf.def.center_freq1; \ - __entry->center_freq2 = ctx->conf.def.center_freq2; \ +#define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \ __entry->rx_chains_static = ctx->conf.rx_chains_static; \ __entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic -#define CHANCTX_PR_FMT " control:%d MHz width:%d center: %d/%d MHz chains:%d/%d" -#define CHANCTX_PR_ARG __entry->control_freq, __entry->chan_width, \ - __entry->center_freq1, __entry->center_freq2, \ +#define CHANCTX_PR_FMT CHANDEF_PR_FMT " chains:%d/%d" +#define CHANCTX_PR_ARG CHANDEF_PR_ARG, \ __entry->rx_chains_static, __entry->rx_chains_dynamic @@ -334,6 +340,7 @@ TRACE_EVENT(drv_bss_info_changed, __field(u16, assoc_cap) __field(u64, sync_tsf) __field(u32, sync_device_ts) + __field(u8, sync_dtim_count) __field(u32, basic_rates) __array(int, mcast_rate, IEEE80211_NUM_BANDS) __field(u16, ht_operation_mode) @@ -341,8 +348,11 @@ TRACE_EVENT(drv_bss_info_changed, __field(s32, cqm_rssi_hyst); __field(u32, channel_width); __field(u32, channel_cfreq1); - __dynamic_array(u32, arp_addr_list, info->arp_addr_cnt); - __field(bool, arp_filter_enabled); + __dynamic_array(u32, arp_addr_list, + info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ? + IEEE80211_BSS_ARP_ADDR_LIST_LEN : + info->arp_addr_cnt); + __field(int, arp_addr_cnt); __field(bool, qos); __field(bool, idle); __field(bool, ps); @@ -370,6 +380,7 @@ TRACE_EVENT(drv_bss_info_changed, __entry->assoc_cap = info->assoc_capability; __entry->sync_tsf = info->sync_tsf; __entry->sync_device_ts = info->sync_device_ts; + __entry->sync_dtim_count = info->sync_dtim_count; __entry->basic_rates = info->basic_rates; memcpy(__entry->mcast_rate, info->mcast_rate, sizeof(__entry->mcast_rate)); @@ -378,9 +389,11 @@ TRACE_EVENT(drv_bss_info_changed, __entry->cqm_rssi_hyst = info->cqm_rssi_hyst; __entry->channel_width = info->chandef.width; __entry->channel_cfreq1 = info->chandef.center_freq1; + __entry->arp_addr_cnt = info->arp_addr_cnt; memcpy(__get_dynamic_array(arp_addr_list), info->arp_addr_list, - sizeof(u32) * info->arp_addr_cnt); - __entry->arp_filter_enabled = info->arp_filter_enabled; + sizeof(u32) * (info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ? + IEEE80211_BSS_ARP_ADDR_LIST_LEN : + info->arp_addr_cnt)); __entry->qos = info->qos; __entry->idle = info->idle; __entry->ps = info->ps; @@ -466,7 +479,7 @@ TRACE_EVENT(drv_set_tim, TP_printk( LOCAL_PR_FMT STA_PR_FMT " set:%d", - LOCAL_PR_ARG, STA_PR_FMT, __entry->set + LOCAL_PR_ARG, STA_PR_ARG, __entry->set ) ); @@ -1178,23 +1191,26 @@ TRACE_EVENT(drv_set_rekey_data, TRACE_EVENT(drv_rssi_callback, TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, enum ieee80211_rssi_event rssi_event), - TP_ARGS(local, rssi_event), + TP_ARGS(local, sdata, rssi_event), TP_STRUCT__entry( LOCAL_ENTRY + VIF_ENTRY __field(u32, rssi_event) ), TP_fast_assign( LOCAL_ASSIGN; + VIF_ASSIGN; __entry->rssi_event = rssi_event; ), TP_printk( - LOCAL_PR_FMT " rssi_event:%d", - LOCAL_PR_ARG, __entry->rssi_event + LOCAL_PR_FMT VIF_PR_FMT " rssi_event:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->rssi_event ) ); @@ -1426,6 +1442,14 @@ DEFINE_EVENT(local_only_evt, drv_restart_complete, TP_ARGS(local) ); +#if IS_ENABLED(CONFIG_IPV6) +DEFINE_EVENT(local_sdata_evt, drv_ipv6_addr_change, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + TP_ARGS(local, sdata) +); +#endif + /* * Tracing for API calls that drivers call. */ @@ -1660,7 +1684,7 @@ TRACE_EVENT(api_sta_block_awake, TP_printk( LOCAL_PR_FMT STA_PR_FMT " block:%d", - LOCAL_PR_ARG, STA_PR_FMT, __entry->block + LOCAL_PR_ARG, STA_PR_ARG, __entry->block ) ); @@ -1758,7 +1782,7 @@ TRACE_EVENT(api_eosp, TP_printk( LOCAL_PR_FMT STA_PR_FMT, - LOCAL_PR_ARG, STA_PR_FMT + LOCAL_PR_ARG, STA_PR_ARG ) ); @@ -1815,6 +1839,48 @@ TRACE_EVENT(stop_queue, ) ); +TRACE_EVENT(drv_set_default_unicast_key, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + int key_idx), + + TP_ARGS(local, sdata, key_idx), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(int, key_idx) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->key_idx = key_idx; + ), + + TP_printk(LOCAL_PR_FMT VIF_PR_FMT " key_idx:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->key_idx) +); + +TRACE_EVENT(api_radar_detected, + TP_PROTO(struct ieee80211_local *local), + + TP_ARGS(local), + + TP_STRUCT__entry( + LOCAL_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT " radar detected", + LOCAL_PR_ARG + ) +); + #ifdef CONFIG_MAC80211_MESSAGE_TRACING #undef TRACE_SYSTEM #define TRACE_SYSTEM mac80211_msg diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e9eadc4..8914d2d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -329,6 +329,8 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) if (sdata->vif.type == NL80211_IFTYPE_AP) ps = &sdata->u.ap.ps; + else if (ieee80211_vif_is_mesh(&sdata->vif)) + ps = &sdata->u.mesh.ps; else continue; @@ -372,18 +374,20 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) /* * broadcast/multicast frame * - * If any of the associated stations is in power save mode, + * If any of the associated/peer stations is in power save mode, * the frame is buffered to be sent after DTIM beacon frame. * This is done either by the hardware or us. */ - /* powersaving STAs currently only in AP/VLAN mode */ + /* powersaving STAs currently only in AP/VLAN/mesh mode */ if (tx->sdata->vif.type == NL80211_IFTYPE_AP || tx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { if (!tx->sdata->bss) return TX_CONTINUE; ps = &tx->sdata->bss->ps; + } else if (ieee80211_vif_is_mesh(&tx->sdata->vif)) { + ps = &tx->sdata->u.mesh.ps; } else { return TX_CONTINUE; } @@ -594,7 +598,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) break; } - if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED)) + if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED && + !ieee80211_is_deauth(hdr->frame_control))) return TX_DROP; if (!skip_hw && tx->key && @@ -1225,20 +1230,41 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, spin_lock_irqsave(&local->queue_stop_reason_lock, flags); if (local->queue_stop_reasons[q] || (!txpending && !skb_queue_empty(&local->pending[q]))) { - /* - * Since queue is stopped, queue up frames for later - * transmission from the tx-pending tasklet when the - * queue is woken again. - */ - if (txpending) - skb_queue_splice_init(skbs, &local->pending[q]); - else - skb_queue_splice_tail_init(skbs, - &local->pending[q]); + if (unlikely(info->flags & + IEEE80211_TX_INTFL_OFFCHAN_TX_OK)) { + if (local->queue_stop_reasons[q] & + ~BIT(IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL)) { + /* + * Drop off-channel frames if queues + * are stopped for any reason other + * than off-channel operation. Never + * queue them. + */ + spin_unlock_irqrestore( + &local->queue_stop_reason_lock, + flags); + ieee80211_purge_tx_queue(&local->hw, + skbs); + return true; + } + } else { - spin_unlock_irqrestore(&local->queue_stop_reason_lock, - flags); - return false; + /* + * Since queue is stopped, queue up frames for + * later transmission from the tx-pending + * tasklet when the queue is woken again. + */ + if (txpending) + skb_queue_splice_init(skbs, + &local->pending[q]); + else + skb_queue_splice_tail_init(skbs, + &local->pending[q]); + + spin_unlock_irqrestore(&local->queue_stop_reason_lock, + flags); + return false; + } } spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); @@ -1472,12 +1498,14 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, hdr = (struct ieee80211_hdr *) skb->data; info->control.vif = &sdata->vif; - if (ieee80211_vif_is_mesh(&sdata->vif) && - ieee80211_is_data(hdr->frame_control) && - !is_multicast_ether_addr(hdr->addr1) && - mesh_nexthop_resolve(skb, sdata)) { - /* skb queued: don't free */ - return; + if (ieee80211_vif_is_mesh(&sdata->vif)) { + if (ieee80211_is_data(hdr->frame_control) && + is_unicast_ether_addr(hdr->addr1)) { + if (mesh_nexthop_resolve(sdata, skb)) + return; /* skb queued: don't free */ + } else { + ieee80211_mps_set_frame_flags(sdata, NULL, hdr); + } } ieee80211_set_qos_hdr(sdata, skb); @@ -1673,10 +1701,13 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, chanctx_conf = rcu_dereference(tmp_sdata->vif.chanctx_conf); } - if (!chanctx_conf) - goto fail_rcu; - chan = chanctx_conf->def.chan; + if (chanctx_conf) + chan = chanctx_conf->def.chan; + else if (!local->use_chanctx) + chan = local->_oper_channel; + else + goto fail_rcu; /* * Frame injection is not allowed if beaconing is not allowed @@ -1784,16 +1815,16 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, break; /* fall through */ case NL80211_IFTYPE_AP: + if (sdata->vif.type == NL80211_IFTYPE_AP) + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + if (!chanctx_conf) + goto fail_rcu; fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); /* DA BSSID SA */ memcpy(hdr.addr1, skb->data, ETH_ALEN); memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN); memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); hdrlen = 24; - if (sdata->vif.type == NL80211_IFTYPE_AP) - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (!chanctx_conf) - goto fail_rcu; band = chanctx_conf->def.chan->band; break; case NL80211_IFTYPE_WDS: @@ -1819,9 +1850,24 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, } if (!is_multicast_ether_addr(skb->data)) { - mpath = mesh_path_lookup(skb->data, sdata); - if (!mpath) - mppath = mpp_path_lookup(skb->data, sdata); + struct sta_info *next_hop; + bool mpp_lookup = true; + + mpath = mesh_path_lookup(sdata, skb->data); + if (mpath) { + mpp_lookup = false; + next_hop = rcu_dereference(mpath->next_hop); + if (!next_hop || + !(mpath->flags & (MESH_PATH_ACTIVE | + MESH_PATH_RESOLVING))) + mpp_lookup = true; + } + + if (mpp_lookup) + mppath = mpp_path_lookup(sdata, skb->data); + + if (mppath && mpath) + mesh_path_del(mpath->sdata, mpath->dst); } /* @@ -1834,8 +1880,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, !(mppath && !ether_addr_equal(mppath->mpp, skb->data))) { hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc, skb->data, skb->data + ETH_ALEN); - meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, - sdata, NULL, NULL); + meshhdrlen = ieee80211_new_mesh_header(sdata, &mesh_hdr, + NULL, NULL); } else { /* DS -> MBSS (802.11-2012 13.11.3.3). * For unicast with unknown forwarding information, @@ -1854,18 +1900,14 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, mesh_da, sdata->vif.addr); if (is_multicast_ether_addr(mesh_da)) /* DA TA mSA AE:SA */ - meshhdrlen = - ieee80211_new_mesh_header(&mesh_hdr, - sdata, - skb->data + ETH_ALEN, - NULL); + meshhdrlen = ieee80211_new_mesh_header( + sdata, &mesh_hdr, + skb->data + ETH_ALEN, NULL); else /* RA TA mDA mSA AE:DA SA */ - meshhdrlen = - ieee80211_new_mesh_header(&mesh_hdr, - sdata, - skb->data, - skb->data + ETH_ALEN); + meshhdrlen = ieee80211_new_mesh_header( + sdata, &mesh_hdr, skb->data, + skb->data + ETH_ALEN); } chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); @@ -1996,24 +2038,14 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, skb = skb_clone(skb, GFP_ATOMIC); if (skb) { unsigned long flags; - int id, r; + int id; spin_lock_irqsave(&local->ack_status_lock, flags); - r = idr_get_new_above(&local->ack_status_frames, - orig_skb, 1, &id); - if (r == -EAGAIN) { - idr_pre_get(&local->ack_status_frames, - GFP_ATOMIC); - r = idr_get_new_above(&local->ack_status_frames, - orig_skb, 1, &id); - } - if (WARN_ON(!id) || id > 0xffff) { - idr_remove(&local->ack_status_frames, id); - r = -ERANGE; - } + id = idr_alloc(&local->ack_status_frames, orig_skb, + 1, 0x10000, GFP_ATOMIC); spin_unlock_irqrestore(&local->ack_status_lock, flags); - if (!r) { + if (id >= 0) { info_id = id; info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; } else if (skb_shared(skb)) { @@ -2261,9 +2293,8 @@ void ieee80211_tx_pending(unsigned long data) /* functions for drivers to get certain frames */ -static void ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, - struct ps_data *ps, - struct sk_buff *skb) +static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, + struct ps_data *ps, struct sk_buff *skb) { u8 *pos, *tim; int aid0 = 0; @@ -2325,6 +2356,29 @@ static void ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, } } +static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, + struct ps_data *ps, struct sk_buff *skb) +{ + struct ieee80211_local *local = sdata->local; + + /* + * Not very nice, but we want to allow the driver to call + * ieee80211_beacon_get() as a response to the set_tim() + * callback. That, however, is already invoked under the + * sta_lock to guarantee consistent and race-free update + * of the tim bitmap in mac80211 and the driver. + */ + if (local->tim_in_locked_section) { + __ieee80211_beacon_add_tim(sdata, ps, skb); + } else { + spin_lock_bh(&local->tim_lock); + __ieee80211_beacon_add_tim(sdata, ps, skb); + spin_unlock_bh(&local->tim_lock); + } + + return 0; +} + struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u16 *tim_offset, u16 *tim_length) @@ -2369,22 +2423,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, memcpy(skb_put(skb, beacon->head_len), beacon->head, beacon->head_len); - /* - * Not very nice, but we want to allow the driver to call - * ieee80211_beacon_get() as a response to the set_tim() - * callback. That, however, is already invoked under the - * sta_lock to guarantee consistent and race-free update - * of the tim bitmap in mac80211 and the driver. - */ - if (local->tim_in_locked_section) { - ieee80211_beacon_add_tim(sdata, &ap->ps, skb); - } else { - unsigned long flags; - - spin_lock_irqsave(&local->tim_lock, flags); - ieee80211_beacon_add_tim(sdata, &ap->ps, skb); - spin_unlock_irqrestore(&local->tim_lock, flags); - } + ieee80211_beacon_add_tim(sdata, &ap->ps, skb); if (tim_offset) *tim_offset = beacon->head_len; @@ -2412,66 +2451,26 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); } else if (ieee80211_vif_is_mesh(&sdata->vif)) { - struct ieee80211_mgmt *mgmt; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - u8 *pos; - int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) + - sizeof(mgmt->u.beacon); + struct beacon_data *bcn = rcu_dereference(ifmsh->beacon); -#ifdef CONFIG_MAC80211_MESH - if (!sdata->u.mesh.mesh_id_len) + if (!bcn) goto out; -#endif if (ifmsh->sync_ops) ifmsh->sync_ops->adjust_tbtt( sdata); skb = dev_alloc_skb(local->tx_headroom + - hdr_len + - 2 + /* NULL SSID */ - 2 + 8 + /* supported rates */ - 2 + 3 + /* DS params */ - 2 + (IEEE80211_MAX_SUPP_RATES - 8) + - 2 + sizeof(struct ieee80211_ht_cap) + - 2 + sizeof(struct ieee80211_ht_operation) + - 2 + sdata->u.mesh.mesh_id_len + - 2 + sizeof(struct ieee80211_meshconf_ie) + - sdata->u.mesh.ie_len); + bcn->head_len + + 256 + /* TIM IE */ + bcn->tail_len); if (!skb) goto out; - - skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); - memset(mgmt, 0, hdr_len); - mgmt->frame_control = - cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); - eth_broadcast_addr(mgmt->da); - memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); - memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); - mgmt->u.beacon.beacon_int = - cpu_to_le16(sdata->vif.bss_conf.beacon_int); - mgmt->u.beacon.capab_info |= cpu_to_le16( - sdata->u.mesh.security ? WLAN_CAPABILITY_PRIVACY : 0); - - pos = skb_put(skb, 2); - *pos++ = WLAN_EID_SSID; - *pos++ = 0x0; - - band = chanctx_conf->def.chan->band; - - if (ieee80211_add_srates_ie(sdata, skb, true, band) || - mesh_add_ds_params_ie(skb, sdata) || - ieee80211_add_ext_srates_ie(sdata, skb, true, band) || - mesh_add_rsn_ie(skb, sdata) || - mesh_add_ht_cap_ie(skb, sdata) || - mesh_add_ht_oper_ie(skb, sdata) || - mesh_add_meshid_ie(skb, sdata) || - mesh_add_meshconf_ie(skb, sdata) || - mesh_add_vendor_ies(skb, sdata)) { - pr_err("o11s: couldn't add ies!\n"); - goto out; - } + skb_reserve(skb, local->tx_headroom); + memcpy(skb_put(skb, bcn->head_len), bcn->head, bcn->head_len); + ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb); + memcpy(skb_put(skb, bcn->tail_len), bcn->tail, bcn->tail_len); } else { WARN_ON(1); goto out; @@ -2721,6 +2720,8 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, goto out; ps = &sdata->u.ap.ps; + } else if (ieee80211_vif_is_mesh(&sdata->vif)) { + ps = &sdata->u.mesh.ps; } else { goto out; } @@ -2744,6 +2745,8 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, cpu_to_le16(IEEE80211_FCTL_MOREDATA); } + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev); if (!ieee80211_tx_prepare(sdata, &tx, skb)) break; dev_kfree_skb_any(skb); @@ -2776,6 +2779,8 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, skb_set_queue_mapping(skb, ac); skb->priority = tid; + skb->dev = sdata->dev; + /* * The other path calling ieee80211_xmit is from the tasklet, * and while we can handle concurrent transmissions locking diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f11e8c5..0f38f43 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -739,11 +739,7 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, if (calc_crc) crc = crc32_be(crc, pos - 2, elen + 2); - if (pos[3] == 1) { - /* OUI Type 1 - WPA IE */ - elems->wpa = pos; - elems->wpa_len = elen; - } else if (elen >= 5 && pos[3] == 2) { + if (elen >= 5 && pos[3] == 2) { /* OUI Type 2 - WMM IE */ if (pos[4] == 0) { elems->wmm_info = pos; @@ -791,6 +787,12 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, else elem_parse_failed = true; break; + case WLAN_EID_OPMODE_NOTIF: + if (elen > 0) + elems->opmode_notif = pos; + else + elem_parse_failed = true; + break; case WLAN_EID_MESH_ID: elems->mesh_id = pos; elems->mesh_id_len = elen; @@ -805,6 +807,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, elems->peering = pos; elems->peering_len = elen; break; + case WLAN_EID_MESH_AWAKE_WINDOW: + if (elen >= 2) + elems->awake_window = (void *)pos; + break; case WLAN_EID_PREQ: elems->preq = pos; elems->preq_len = elen; @@ -1029,8 +1035,9 @@ u32 ieee80211_mandatory_rates(struct ieee80211_local *local, void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, - u8 *extra, size_t extra_len, const u8 *da, - const u8 *bssid, const u8 *key, u8 key_len, u8 key_idx) + const u8 *extra, size_t extra_len, const u8 *da, + const u8 *bssid, const u8 *key, u8 key_len, u8 key_idx, + u32 tx_flags) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -1063,7 +1070,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, WARN_ON(err); } - IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | + tx_flags; ieee80211_tx_skb(sdata, skb); } @@ -1277,7 +1285,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, - u32 ratemask, bool directed, bool no_cck, + u32 ratemask, bool directed, u32 tx_flags, struct ieee80211_channel *channel, bool scan) { struct sk_buff *skb; @@ -1286,9 +1294,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, ssid, ssid_len, ie, ie_len, directed); if (skb) { - if (no_cck) - IEEE80211_SKB_CB(skb)->flags |= - IEEE80211_TX_CTL_NO_CCK_RATE; + IEEE80211_SKB_CB(skb)->flags |= tx_flags; if (scan) ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band); else @@ -1358,6 +1364,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) struct ieee80211_chanctx *ctx; struct sta_info *sta; int res, i; + bool reconfig_due_to_wowlan = false; #ifdef CONFIG_PM if (local->suspended) @@ -1377,6 +1384,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) * res is 1, which means the driver requested * to go through a regular reset on wakeup. */ + reconfig_due_to_wowlan = true; } #endif /* everything else happens only if HW was up & running */ @@ -1526,11 +1534,20 @@ int ieee80211_reconfig(struct ieee80211_local *local) BSS_CHANGED_IDLE | BSS_CHANGED_TXPOWER; +#ifdef CONFIG_PM + if (local->resuming && !reconfig_due_to_wowlan) + sdata->vif.bss_conf = sdata->suspend_bss_conf; +#endif + switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: changed |= BSS_CHANGED_ASSOC | BSS_CHANGED_ARP_FILTER | BSS_CHANGED_PS; + + if (sdata->u.mgd.dtim_period) + changed |= BSS_CHANGED_DTIM_PERIOD; + mutex_lock(&sdata->u.mgd.mtx); ieee80211_bss_info_change_notify(sdata, changed); mutex_unlock(&sdata->u.mgd.mtx); @@ -1550,9 +1567,11 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* fall through */ case NL80211_IFTYPE_MESH_POINT: - changed |= BSS_CHANGED_BEACON | - BSS_CHANGED_BEACON_ENABLED; - ieee80211_bss_info_change_notify(sdata, changed); + if (sdata->vif.bss_conf.enable_beacon) { + changed |= BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED; + ieee80211_bss_info_change_notify(sdata, changed); + } break; case NL80211_IFTYPE_WDS: break; @@ -1632,7 +1651,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { - ieee80211_sta_tear_down_BA_sessions(sta, true); + ieee80211_sta_tear_down_BA_sessions( + sta, AGG_STOP_LOCAL_REQUEST); clear_sta_flag(sta, WLAN_STA_BLOCK_BA); } @@ -1646,10 +1666,11 @@ int ieee80211_reconfig(struct ieee80211_local *local) * If this is for hw restart things are still running. * We may want to change that later, however. */ - if (!local->suspended) { + if (!local->suspended || reconfig_due_to_wowlan) drv_restart_complete(local); + + if (!local->suspended) return 0; - } #ifdef CONFIG_PM /* first set suspended false, then resuming */ @@ -1864,7 +1885,7 @@ u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, } u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, - u32 cap) + u32 cap) { __le32 tmp; @@ -1926,7 +1947,7 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, } void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, - struct ieee80211_ht_operation *ht_oper, + const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef) { enum nl80211_channel_type channel_type; @@ -2114,3 +2135,49 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, return ts; } + +void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + + mutex_lock(&local->iflist_mtx); + list_for_each_entry(sdata, &local->interfaces, list) { + cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); + + if (sdata->wdev.cac_started) { + ieee80211_vif_release_channel(sdata); + cfg80211_cac_event(sdata->dev, + NL80211_RADAR_CAC_ABORTED, + GFP_KERNEL); + } + } + mutex_unlock(&local->iflist_mtx); +} + +void ieee80211_dfs_radar_detected_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, radar_detected_work); + struct cfg80211_chan_def chandef; + + ieee80211_dfs_cac_cancel(local); + + if (local->use_chanctx) + /* currently not handled */ + WARN_ON(1); + else { + cfg80211_chandef_create(&chandef, local->hw.conf.channel, + local->hw.conf.channel_type); + cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL); + } +} + +void ieee80211_radar_detected(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + + trace_api_radar_detected(local); + + ieee80211_queue_work(hw, &local->radar_detected_work); +} +EXPORT_SYMBOL(ieee80211_radar_detected); diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index f311388..a2c2258 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -10,21 +10,29 @@ #include <linux/export.h> #include <net/mac80211.h> #include "ieee80211_i.h" +#include "rate.h" -void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - struct ieee80211_vht_cap *vht_cap_ie, - struct ieee80211_sta_vht_cap *vht_cap) +void +ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct ieee80211_vht_cap *vht_cap_ie, + struct sta_info *sta) { - if (WARN_ON_ONCE(!vht_cap)) - return; + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; memset(vht_cap, 0, sizeof(*vht_cap)); + if (!sta->sta.ht_cap.ht_supported) + return; + if (!vht_cap_ie || !sband->vht_cap.vht_supported) return; + /* A VHT STA must support 40 MHz */ + if (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)) + return; + vht_cap->vht_supported = true; vht_cap->cap = le32_to_cpu(vht_cap_ie->vht_cap_info); @@ -32,4 +40,156 @@ void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, /* Copy peer MCS info, the driver might need them. */ memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs, sizeof(struct ieee80211_vht_mcs_info)); + + switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + break; + default: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; + } + + sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); +} + +enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + u32 cap = sta->sta.vht_cap.cap; + enum ieee80211_sta_rx_bandwidth bw; + + if (!sta->sta.vht_cap.vht_supported) { + bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; + goto check_max; + } + + switch (sdata->vif.bss_conf.chandef.width) { + default: + WARN_ON_ONCE(1); + /* fall through */ + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; + break; + case NL80211_CHAN_WIDTH_160: + if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) == + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) { + bw = IEEE80211_STA_RX_BW_160; + break; + } + /* fall through */ + case NL80211_CHAN_WIDTH_80P80: + if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) == + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) { + bw = IEEE80211_STA_RX_BW_160; + break; + } + /* fall through */ + case NL80211_CHAN_WIDTH_80: + bw = IEEE80211_STA_RX_BW_80; + } + + check_max: + if (bw > sta->cur_max_bandwidth) + bw = sta->cur_max_bandwidth; + return bw; +} + +void ieee80211_sta_set_rx_nss(struct sta_info *sta) +{ + u8 ht_rx_nss = 0, vht_rx_nss = 0; + + /* if we received a notification already don't overwrite it */ + if (sta->sta.rx_nss) + return; + + if (sta->sta.ht_cap.ht_supported) { + if (sta->sta.ht_cap.mcs.rx_mask[0]) + ht_rx_nss++; + if (sta->sta.ht_cap.mcs.rx_mask[1]) + ht_rx_nss++; + if (sta->sta.ht_cap.mcs.rx_mask[2]) + ht_rx_nss++; + if (sta->sta.ht_cap.mcs.rx_mask[3]) + ht_rx_nss++; + /* FIXME: consider rx_highest? */ + } + + if (sta->sta.vht_cap.vht_supported) { + int i; + u16 rx_mcs_map; + + rx_mcs_map = le16_to_cpu(sta->sta.vht_cap.vht_mcs.rx_mcs_map); + + for (i = 7; i >= 0; i--) { + u8 mcs = (rx_mcs_map >> (2 * i)) & 3; + + if (mcs != IEEE80211_VHT_MCS_NOT_SUPPORTED) { + vht_rx_nss = i + 1; + break; + } + } + /* FIXME: consider rx_highest? */ + } + + ht_rx_nss = max(ht_rx_nss, vht_rx_nss); + sta->sta.rx_nss = max_t(u8, 1, ht_rx_nss); +} + +void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, u8 opmode, + enum ieee80211_band band, bool nss_only) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + enum ieee80211_sta_rx_bandwidth new_bw; + u32 changed = 0; + u8 nss; + + sband = local->hw.wiphy->bands[band]; + + /* ignore - no support for BF yet */ + if (opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF) + return; + + nss = opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK; + nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT; + nss += 1; + + if (sta->sta.rx_nss != nss) { + sta->sta.rx_nss = nss; + changed |= IEEE80211_RC_NSS_CHANGED; + } + + if (nss_only) + goto change; + + switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) { + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20; + break; + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40; + break; + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; + break; + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + break; + } + + new_bw = ieee80211_sta_cur_vht_bw(sta); + if (new_bw != sta->sta.bandwidth) { + sta->sta.bandwidth = new_bw; + changed |= IEEE80211_RC_NSS_CHANGED; + } + + change: + if (changed) + rate_control_rate_update(local, sband, sta, changed); } diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 906f00c..afba19c 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -191,6 +191,15 @@ void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata, /* qos header is 2 bytes */ *p++ = ack_policy | tid; - *p = ieee80211_vif_is_mesh(&sdata->vif) ? - (IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8) : 0; + if (ieee80211_vif_is_mesh(&sdata->vif)) { + /* preserve RSPI and Mesh PS Level bit */ + *p &= ((IEEE80211_QOS_CTL_RSPI | + IEEE80211_QOS_CTL_MESH_PS_LEVEL) >> 8); + + /* Nulls don't have a mesh header (frame body) */ + if (!ieee80211_is_qos_nullfunc(hdr->frame_control)) + *p |= (IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8); + } else { + *p = 0; + } } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index c175ee8..c7c6d64 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -181,7 +181,6 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_key *key = tx->key; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - unsigned long flags; unsigned int hdrlen; int len, tail; u8 *pos; @@ -216,12 +215,12 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) return 0; /* Increase IV for the frame */ - spin_lock_irqsave(&key->u.tkip.txlock, flags); + spin_lock(&key->u.tkip.txlock); key->u.tkip.tx.iv16++; if (key->u.tkip.tx.iv16 == 0) key->u.tkip.tx.iv32++; pos = ieee80211_tkip_add_iv(pos, key); - spin_unlock_irqrestore(&key->u.tkip.txlock, flags); + spin_unlock(&key->u.tkip.txlock); /* hwaccel - with software IV */ if (info->control.hw_key) diff --git a/net/mac802154/Kconfig b/net/mac802154/Kconfig index a967dda..b33dd76 100644 --- a/net/mac802154/Kconfig +++ b/net/mac802154/Kconfig @@ -1,6 +1,6 @@ config MAC802154 tristate "Generic IEEE 802.15.4 Soft Networking Stack (mac802154)" - depends on IEEE802154 && EXPERIMENTAL + depends on IEEE802154 select CRC_CCITT ---help--- This option enables the hardware independent IEEE 802.15.4 diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index 1191039..d20c6d3 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -41,7 +41,7 @@ static inline int mac802154_fetch_skb_u8(struct sk_buff *skb, u8 *val) return -EINVAL; *val = skb->data[0]; - skb_pull(skb, 1); + skb_pull(skb, 1); return 0; } @@ -137,16 +137,12 @@ static int mac802154_header_create(struct sk_buff *skb, struct ieee802154_addr dev_addr; struct mac802154_sub_if_data *priv = netdev_priv(dev); int pos = 2; - u8 *head; + u8 head[MAC802154_FRAME_HARD_HEADER_LEN]; u16 fc; if (!daddr) return -EINVAL; - head = kzalloc(MAC802154_FRAME_HARD_HEADER_LEN, GFP_KERNEL); - if (head == NULL) - return -ENOMEM; - head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */ fc = mac_cb_type(skb); @@ -210,7 +206,6 @@ static int mac802154_header_create(struct sk_buff *skb, head[1] = fc >> 8; memcpy(skb_push(skb, pos), head, pos); - kfree(head); return pos; } @@ -389,7 +384,7 @@ void mac802154_wpan_setup(struct net_device *dev) static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb) { - return netif_rx(skb); + return netif_rx_ni(skb); } static int diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index fefa514..56d22ca 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -124,9 +124,14 @@ config NF_CONNTRACK_TIMESTAMP If unsure, say `N'. +config NF_CONNTRACK_LABELS + bool + help + This option enables support for assigning user-defined flag bits + to connection tracking entries. It selected by the connlabel match. + config NF_CT_PROTO_DCCP - tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' - depends on EXPERIMENTAL + tristate 'DCCP protocol connection tracking support' depends on NETFILTER_ADVANCED default IP_DCCP help @@ -139,8 +144,7 @@ config NF_CT_PROTO_GRE tristate config NF_CT_PROTO_SCTP - tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' - depends on EXPERIMENTAL + tristate 'SCTP protocol connection tracking support' depends on NETFILTER_ADVANCED default IP_SCTP help @@ -281,8 +285,7 @@ config NF_CONNTRACK_PPTP To compile it as a module, choose M here. If unsure, say N. config NF_CONNTRACK_SANE - tristate "SANE protocol support (EXPERIMENTAL)" - depends on EXPERIMENTAL + tristate "SANE protocol support" depends on NETFILTER_ADVANCED help SANE is a protocol for remote access to scanners as implemented @@ -409,8 +412,7 @@ endif # NF_CONNTRACK # transparent proxy support config NETFILTER_TPROXY - tristate "Transparent proxying support (EXPERIMENTAL)" - depends on EXPERIMENTAL + tristate "Transparent proxying support" depends on IP_NF_MANGLE depends on NETFILTER_ADVANCED help @@ -680,6 +682,13 @@ config NETFILTER_XT_TARGET_NFQUEUE To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_NOTRACK + tristate '"NOTRACK" target support (DEPRECATED)' + depends on NF_CONNTRACK + depends on IP_NF_RAW || IP6_NF_RAW + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_CT + config NETFILTER_XT_TARGET_RATEEST tristate '"RATEEST" target support' depends on NETFILTER_ADVANCED @@ -711,8 +720,7 @@ config NETFILTER_XT_TARGET_TEE this clone be rerouted to another nexthop. config NETFILTER_XT_TARGET_TPROXY - tristate '"TPROXY" target support (EXPERIMENTAL)' - depends on EXPERIMENTAL + tristate '"TPROXY" target support' depends on NETFILTER_TPROXY depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED @@ -776,8 +784,7 @@ config NETFILTER_XT_TARGET_TCPMSS To compile it as a module, choose M here. If unsure, say N. config NETFILTER_XT_TARGET_TCPOPTSTRIP - tristate '"TCPOPTSTRIP" target support (EXPERIMENTAL)' - depends on EXPERIMENTAL + tristate '"TCPOPTSTRIP" target support' depends on IP_NF_MANGLE || IP6_NF_MANGLE depends on NETFILTER_ADVANCED help @@ -798,6 +805,15 @@ config NETFILTER_XT_MATCH_ADDRTYPE If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_BPF + tristate '"bpf" match support' + depends on NETFILTER_ADVANCED + help + BPF matching applies a linux socket filter to each packet and + accepts those for which the filter returns non-zero. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK @@ -835,6 +851,19 @@ config NETFILTER_XT_MATCH_CONNBYTES If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_CONNLABEL + tristate '"connlabel" match support' + select NF_CONNTRACK_LABELS + depends on NF_CONNTRACK + depends on NETFILTER_ADVANCED + ---help--- + This match allows you to test and assign userspace-defined labels names + to a connection. The kernel only stores bit values - mapping + names to bits is done by userspace. + + Unlike connmark, more than 32 flag bits may be assigned to a + connection simultaneously. + config NETFILTER_XT_MATCH_CONNLIMIT tristate '"connlimit" match support"' depends on NF_CONNTRACK @@ -1138,8 +1167,7 @@ config NETFILTER_XT_MATCH_RECENT Official Website: <http://snowman.net/projects/ipt_recent/> config NETFILTER_XT_MATCH_SCTP - tristate '"sctp" protocol match support (EXPERIMENTAL)' - depends on EXPERIMENTAL + tristate '"sctp" protocol match support' depends on NETFILTER_ADVANCED default IP_SCTP help @@ -1151,8 +1179,7 @@ config NETFILTER_XT_MATCH_SCTP <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. config NETFILTER_XT_MATCH_SOCKET - tristate '"socket" match support (EXPERIMENTAL)' - depends on EXPERIMENTAL + tristate '"socket" match support' depends on NETFILTER_TPROXY depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 3259697..a1abf87 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -4,6 +4,7 @@ nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_exp nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o +nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -98,9 +99,11 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o +obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLABEL) += xt_connlabel.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 0f92dc2..d7df6ac 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -339,7 +339,11 @@ bitmap_ipmac_tlist(const struct ip_set *set, nla_put_failure: nla_nest_cancel(skb, nested); ipset_nest_end(skb, atd); - return -EMSGSIZE; + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; } static int diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 6d6d8f2..1ba9dbc 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -88,14 +88,14 @@ find_set_type(const char *name, u8 family, u8 revision) static bool load_settype(const char *name) { - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); pr_debug("try to load ip_set_%s\n", name); if (request_module("ip_set_%s", name) < 0) { pr_warning("Can't find ip_set type %s\n", name); - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); return false; } - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); return true; } @@ -532,7 +532,7 @@ ip_set_nfnl_get(const char *name) ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); for (i = 0; i < ip_set_max; i++) { s = nfnl_set(i); if (s != NULL && STREQ(s->name, name)) { @@ -541,7 +541,7 @@ ip_set_nfnl_get(const char *name) break; } } - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); return index; } @@ -561,13 +561,13 @@ ip_set_nfnl_get_byindex(ip_set_id_t index) if (index > ip_set_max) return IPSET_INVALID_ID; - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); set = nfnl_set(index); if (set) __ip_set_get(set); else index = IPSET_INVALID_ID; - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); return index; } @@ -584,11 +584,11 @@ void ip_set_nfnl_put(ip_set_id_t index) { struct ip_set *set; - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); set = nfnl_set(index); if (set != NULL) __ip_set_put(set); - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -1470,7 +1470,8 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, if (ret == -EAGAIN) ret = 1; - return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST; + return (ret < 0 && ret != -ENOTEMPTY) ? ret : + ret > 0 ? 0 : -IPSET_ERR_EXIST; } /* Get headed data of a set */ @@ -1763,10 +1764,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) goto done; } req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); find_set_and_id(req_get->set.name, &id); req_get->set.index = id; - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } case IP_SET_OP_GET_BYINDEX: { @@ -1778,11 +1779,11 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) ret = -EINVAL; goto done; } - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); set = nfnl_set(req_get->set.index); strncpy(req_get->set.name, set ? set->name : "", IPSET_MAXNAMELEN); - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } default: diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 5c0b785..b7d4cb4 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -234,7 +234,7 @@ hash_ip6_data_equal(const struct hash_ip6_elem *ip1, const struct hash_ip6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0; + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6); } static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 6283351..d8f77ba 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -284,7 +284,7 @@ hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1, const struct hash_ipport6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->port == ip2->port && ip1->proto == ip2->proto; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 6a21271..1da1e95 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -294,8 +294,8 @@ hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1, const struct hash_ipportip6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && - ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && + ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) && ip1->port == ip2->port && ip1->proto == ip2->proto; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 2d5cd4e..10a30b4 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -104,6 +104,15 @@ hash_ipportnet4_data_flags(struct hash_ipportnet4_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_ipportnet4_data_match(const struct hash_ipportnet4_elem *elem) { @@ -388,8 +397,8 @@ hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1, const struct hash_ipportnet6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && - ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && + ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) && ip1->cidr == ip2->cidr && ip1->port == ip2->port && ip1->proto == ip2->proto; @@ -414,6 +423,15 @@ hash_ipportnet6_data_flags(struct hash_ipportnet6_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_ipportnet6_data_match(const struct hash_ipportnet6_elem *elem) { diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 29e94b9..d6a5915 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -87,7 +87,16 @@ hash_net4_data_copy(struct hash_net4_elem *dst, static inline void hash_net4_data_flags(struct hash_net4_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_net4_data_reset_flags(struct hash_net4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int @@ -286,7 +295,7 @@ hash_net6_data_equal(const struct hash_net6_elem *ip1, const struct hash_net6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->cidr == ip2->cidr; } @@ -308,7 +317,16 @@ hash_net6_data_copy(struct hash_net6_elem *dst, static inline void hash_net6_data_flags(struct hash_net6_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_net6_data_reset_flags(struct hash_net6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 45a1014..f2b0a3c 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -198,7 +198,16 @@ hash_netiface4_data_copy(struct hash_netiface4_elem *dst, static inline void hash_netiface4_data_flags(struct hash_netiface4_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_netiface4_data_reset_flags(struct hash_netiface4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int @@ -471,7 +480,7 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1, const struct hash_netiface6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->cidr == ip2->cidr && (++*multi) && ip1->physdev == ip2->physdev && @@ -494,7 +503,7 @@ hash_netiface6_data_copy(struct hash_netiface6_elem *dst, static inline void hash_netiface6_data_flags(struct hash_netiface6_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } static inline int @@ -504,6 +513,15 @@ hash_netiface6_data_match(const struct hash_netiface6_elem *elem) } static inline void +hash_netiface6_data_reset_flags(struct hash_netiface6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + +static inline void hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem) { elem->elem = 0; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 7ef700d..349deb6 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -104,6 +104,15 @@ hash_netport4_data_flags(struct hash_netport4_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_netport4_data_reset_flags(struct hash_netport4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_netport4_data_match(const struct hash_netport4_elem *elem) { @@ -350,7 +359,7 @@ hash_netport6_data_equal(const struct hash_netport6_elem *ip1, const struct hash_netport6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->port == ip2->port && ip1->proto == ip2->proto && ip1->cidr == ip2->cidr; @@ -375,6 +384,15 @@ hash_netport6_data_flags(struct hash_netport6_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_netport6_data_reset_flags(struct hash_netport6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_netport6_data_match(const struct hash_netport6_elem *elem) { diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 8371c2b..09c744a 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -174,9 +174,13 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id, { const struct set_elem *e = list_set_elem(map, i); - if (i == map->size - 1 && e->id != IPSET_INVALID_ID) - /* Last element replaced: e.g. add new,before,last */ - ip_set_put_byindex(e->id); + if (e->id != IPSET_INVALID_ID) { + const struct set_elem *x = list_set_elem(map, map->size - 1); + + /* Last element replaced or pushed off */ + if (x->id != IPSET_INVALID_ID) + ip_set_put_byindex(x->id); + } if (with_timeout(map->timeout)) list_elem_tadd(map, i, id, ip_set_timeout_set(timeout)); else diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 9713e6e..0b779d7 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -605,12 +605,12 @@ int __net_init ip_vs_app_net_init(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); INIT_LIST_HEAD(&ipvs->app_list); - proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); + proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops); return 0; } void __net_exit ip_vs_app_net_cleanup(struct net *net) { unregister_ip_vs_app(net, NULL /* all */); - proc_net_remove(net, "ip_vs_app"); + remove_proc_entry("ip_vs_app", net->proc_net); } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 30e764a..704e514 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -259,13 +259,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) { unsigned int hash; struct ip_vs_conn *cp; - struct hlist_node *n; hash = ip_vs_conn_hashkey_param(p, false); ct_read_lock(hash); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && p->cport == cp->cport && p->vport == cp->vport && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && @@ -344,13 +343,12 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) { unsigned int hash; struct ip_vs_conn *cp; - struct hlist_node *n; hash = ip_vs_conn_hashkey_param(p, false); ct_read_lock(hash); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (!ip_vs_conn_net_eq(cp, p->net)) continue; if (p->pe_data && p->pe->ct_match) { @@ -394,7 +392,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) { unsigned int hash; struct ip_vs_conn *cp, *ret=NULL; - struct hlist_node *n; /* * Check for "full" addressed entries @@ -403,7 +400,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) ct_read_lock(hash); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && p->vport == cp->cport && p->cport == cp->dport && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && @@ -796,8 +793,7 @@ static void ip_vs_conn_expire(unsigned long data) */ if (likely(atomic_read(&cp->refcnt) == 1)) { /* delete the timer if it is activated by other users */ - if (timer_pending(&cp->timer)) - del_timer(&cp->timer); + del_timer(&cp->timer); /* does anybody control me? */ if (cp->control) @@ -954,11 +950,10 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) int idx; struct ip_vs_conn *cp; struct ip_vs_iter_state *iter = seq->private; - struct hlist_node *n; for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { ct_read_lock_bh(idx); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { if (pos-- == 0) { iter->l = &ip_vs_conn_tab[idx]; return cp; @@ -982,7 +977,6 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_conn *cp = v; struct ip_vs_iter_state *iter = seq->private; - struct hlist_node *e; struct hlist_head *l = iter->l; int idx; @@ -991,15 +985,15 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) return ip_vs_conn_array(seq, 0); /* more on same hash chain? */ - if ((e = cp->c_list.next)) - return hlist_entry(e, struct ip_vs_conn, c_list); + if (cp->c_list.next) + return hlist_entry(cp->c_list.next, struct ip_vs_conn, c_list); idx = l - ip_vs_conn_tab; ct_read_unlock_bh(idx); while (++idx < ip_vs_conn_tab_size) { ct_read_lock_bh(idx); - hlist_for_each_entry(cp, e, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { iter->l = &ip_vs_conn_tab[idx]; return cp; } @@ -1201,14 +1195,13 @@ void ip_vs_random_dropentry(struct net *net) */ for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { unsigned int hash = net_random() & ip_vs_conn_tab_mask; - struct hlist_node *n; /* * Lock is actually needed in this loop. */ ct_write_lock_bh(hash); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; @@ -1256,14 +1249,12 @@ static void ip_vs_conn_flush(struct net *net) flush_again: for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - struct hlist_node *n; - /* * Lock is actually needed in this loop. */ ct_write_lock_bh(idx); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { if (!ip_vs_conn_net_eq(cp, net)) continue; IP_VS_DBG(4, "del connection\n"); @@ -1292,8 +1283,8 @@ int __net_init ip_vs_conn_net_init(struct net *net) atomic_set(&ipvs->conn_count, 0); - proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops); - proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); + proc_create("ip_vs_conn", 0, net->proc_net, &ip_vs_conn_fops); + proc_create("ip_vs_conn_sync", 0, net->proc_net, &ip_vs_conn_sync_fops); return 0; } @@ -1301,8 +1292,8 @@ void __net_exit ip_vs_conn_net_cleanup(struct net *net) { /* flush all the connection entries first */ ip_vs_conn_flush(net); - proc_net_remove(net, "ip_vs_conn"); - proc_net_remove(net, "ip_vs_conn_sync"); + remove_proc_entry("ip_vs_conn", net->proc_net); + remove_proc_entry("ip_vs_conn_sync", net->proc_net); } int __init ip_vs_conn_init(void) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 47edf5a..61f49d2 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1394,10 +1394,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) skb_reset_network_header(skb); IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu); - rcu_read_lock(); ipv4_update_pmtu(skb, dev_net(skb->dev), mtu, 0, 0, 0, 0); - rcu_read_unlock(); /* Client uses PMTUD? */ if (!(cih->frag_off & htons(IP_DF))) goto ignore_ipip; @@ -1577,7 +1575,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; ip_vs_fill_iph_skb(af, skb, &iph); @@ -1654,7 +1653,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); - ipvs = net_ipvs(net); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ @@ -1815,13 +1813,15 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, { int r; struct net *net; + struct netns_ipvs *ipvs; if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; return ip_vs_in_icmp(skb, &r, hooknum); @@ -1835,6 +1835,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, { int r; struct net *net; + struct netns_ipvs *ipvs; struct ip_vs_iphdr iphdr; ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr); @@ -1843,7 +1844,8 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ec664cb..9e2d1cc 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1808,6 +1808,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "backup_only", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -3741,6 +3747,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; ipvs->sysctl_pmtu_disc = 1; tbl[idx++].data = &ipvs->sysctl_pmtu_disc; + tbl[idx++].data = &ipvs->sysctl_backup_only; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); @@ -3800,10 +3807,10 @@ int __net_init ip_vs_control_net_init(struct net *net) spin_lock_init(&ipvs->tot_stats.lock); - proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); - proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); - proc_net_fops_create(net, "ip_vs_stats_percpu", 0, - &ip_vs_stats_percpu_fops); + proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops); + proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops); + proc_create("ip_vs_stats_percpu", 0, net->proc_net, + &ip_vs_stats_percpu_fops); if (ip_vs_control_net_init_sysctl(net)) goto err; @@ -3822,9 +3829,9 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); ip_vs_control_net_cleanup_sysctl(net); - proc_net_remove(net, "ip_vs_stats_percpu"); - proc_net_remove(net, "ip_vs_stats"); - proc_net_remove(net, "ip_vs"); + remove_proc_entry("ip_vs_stats_percpu", net->proc_net); + remove_proc_entry("ip_vs_stats", net->proc_net); + remove_proc_entry("ip_vs", net->proc_net); free_percpu(ipvs->tot_stats.cpustats); } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 746048b..cd1d729 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -61,14 +61,27 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, return 1; } +static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph, + unsigned int sctphoff) +{ + __u32 crc32; + struct sk_buff *iter; + + crc32 = sctp_start_cksum((__u8 *)sctph, skb_headlen(skb) - sctphoff); + skb_walk_frags(skb, iter) + crc32 = sctp_update_cksum((u8 *) iter->data, + skb_headlen(iter), crc32); + sctph->checksum = sctp_end_cksum(crc32); + + skb->ip_summed = CHECKSUM_UNNECESSARY; +} + static int sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; - struct sk_buff *iter; - __be32 crc32; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -92,13 +105,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, sctph = (void *) skb_network_header(skb) + sctphoff; sctph->source = cp->vport; - /* Calculate the checksum */ - crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff); - skb_walk_frags(skb, iter) - crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter), - crc32); - crc32 = sctp_end_cksum(crc32); - sctph->checksum = crc32; + sctp_nat_csum(skb, sctph, sctphoff); return 1; } @@ -109,8 +116,6 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; - struct sk_buff *iter; - __be32 crc32; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -134,13 +139,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, sctph = (void *) skb_network_header(skb) + sctphoff; sctph->dest = cp->dport; - /* Calculate the checksum */ - crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff); - skb_walk_frags(skb, iter) - crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter), - crc32); - crc32 = sctp_end_cksum(crc32); - sctph->checksum = crc32; + sctp_nat_csum(skb, sctph, sctphoff); return 1; } @@ -907,7 +906,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, sctp_chunkhdr_t _sctpch, *sch; unsigned char chunk_type; int event, next_state; - int ihl; + int ihl, cofs; #ifdef CONFIG_IP_VS_IPV6 ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); @@ -915,8 +914,8 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, ihl = ip_hdrlen(skb); #endif - sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t), - sizeof(_sctpch), &_sctpch); + cofs = ihl + sizeof(sctp_sctphdr_t); + sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch); if (sch == NULL) return; @@ -934,10 +933,12 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, */ if ((sch->type == SCTP_CID_COOKIE_ECHO) || (sch->type == SCTP_CID_COOKIE_ACK)) { - sch = skb_header_pointer(skb, (ihl + sizeof(sctp_sctphdr_t) + - sch->length), sizeof(_sctpch), &_sctpch); - if (sch) { - if (sch->type == SCTP_CID_ABORT) + int clen = ntohs(sch->length); + + if (clen >= sizeof(sctp_chunkhdr_t)) { + sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4), + sizeof(_sctpch), &_sctpch); + if (sch && sch->type == SCTP_CID_ABORT) chunk_type = sch->type; } } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index effa10c..44fd10c 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1795,6 +1795,8 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) GFP_KERNEL); if (!tinfo->buf) goto outtinfo; + } else { + tinfo->buf = NULL; } tinfo->id = id; diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 7df424e..2d3030a 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -106,36 +106,26 @@ static void nf_conntrack_acct_fini_sysctl(struct net *net) } #endif -int nf_conntrack_acct_init(struct net *net) +int nf_conntrack_acct_pernet_init(struct net *net) { - int ret; - net->ct.sysctl_acct = nf_ct_acct; + return nf_conntrack_acct_init_sysctl(net); +} - if (net_eq(net, &init_net)) { - ret = nf_ct_extend_register(&acct_extend); - if (ret < 0) { - printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); - goto out_extend_register; - } - } +void nf_conntrack_acct_pernet_fini(struct net *net) +{ + nf_conntrack_acct_fini_sysctl(net); +} - ret = nf_conntrack_acct_init_sysctl(net); +int nf_conntrack_acct_init(void) +{ + int ret = nf_ct_extend_register(&acct_extend); if (ret < 0) - goto out_sysctl; - - return 0; - -out_sysctl: - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&acct_extend); -out_extend_register: + pr_err("nf_conntrack_acct: Unable to register extension\n"); return ret; } -void nf_conntrack_acct_fini(struct net *net) +void nf_conntrack_acct_fini(void) { - nf_conntrack_acct_fini_sysctl(net); - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&acct_extend); + nf_ct_extend_unregister(&acct_extend); } diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index c514fe6..dbdaa11 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -145,6 +145,7 @@ static int amanda_help(struct sk_buff *skb, exp = nf_ct_expect_alloc(ct); if (exp == NULL) { + nf_ct_helper_log(skb, ct, "cannot alloc expectation"); ret = NF_DROP; goto out; } @@ -158,8 +159,10 @@ static int amanda_help(struct sk_buff *skb, if (nf_nat_amanda && ct->status & IPS_NAT_MASK) ret = nf_nat_amanda(skb, ctinfo, protoff, off - dataoff, len, exp); - else if (nf_ct_expect_related(exp) != 0) + else if (nf_ct_expect_related(exp) != 0) { + nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; + } nf_ct_expect_put(exp); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 08cdc71..c8e001a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -45,6 +45,7 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_timestamp.h> #include <net/netfilter/nf_conntrack_timeout.h> +#include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> @@ -763,6 +764,7 @@ void nf_conntrack_free(struct nf_conn *ct) } EXPORT_SYMBOL_GPL(nf_conntrack_free); + /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * @@ -809,6 +811,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); + nf_ct_labels_ext_add(ct); ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, @@ -1331,18 +1334,42 @@ static int untrack_refs(void) return cnt; } -static void nf_conntrack_cleanup_init_net(void) +void nf_conntrack_cleanup_start(void) { + RCU_INIT_POINTER(ip_ct_attach, NULL); +} + +void nf_conntrack_cleanup_end(void) +{ + RCU_INIT_POINTER(nf_ct_destroy, NULL); while (untrack_refs() > 0) schedule(); #ifdef CONFIG_NF_CONNTRACK_ZONES nf_ct_extend_unregister(&nf_ct_zone_extend); #endif + nf_conntrack_proto_fini(); + nf_conntrack_labels_fini(); + nf_conntrack_helper_fini(); + nf_conntrack_timeout_fini(); + nf_conntrack_ecache_fini(); + nf_conntrack_tstamp_fini(); + nf_conntrack_acct_fini(); + nf_conntrack_expect_fini(); } -static void nf_conntrack_cleanup_net(struct net *net) +/* + * Mishearing the voices in his head, our hero wonders how he's + * supposed to kill the mall. + */ +void nf_conntrack_cleanup_net(struct net *net) { + /* + * This makes sure all current packets have passed through + * netfilter framework. Roll on, two-stage module + * delete... + */ + synchronize_net(); i_see_dead_people: nf_ct_iterate_cleanup(net, kill_all, NULL); nf_ct_release_dying_list(net); @@ -1352,37 +1379,17 @@ static void nf_conntrack_cleanup_net(struct net *net) } nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); - nf_conntrack_helper_fini(net); - nf_conntrack_timeout_fini(net); - nf_conntrack_ecache_fini(net); - nf_conntrack_tstamp_fini(net); - nf_conntrack_acct_fini(net); - nf_conntrack_expect_fini(net); + nf_conntrack_proto_pernet_fini(net); + nf_conntrack_helper_pernet_fini(net); + nf_conntrack_ecache_pernet_fini(net); + nf_conntrack_tstamp_pernet_fini(net); + nf_conntrack_acct_pernet_fini(net); + nf_conntrack_expect_pernet_fini(net); kmem_cache_destroy(net->ct.nf_conntrack_cachep); kfree(net->ct.slabname); free_percpu(net->ct.stat); } -/* Mishearing the voices in his head, our hero wonders how he's - supposed to kill the mall. */ -void nf_conntrack_cleanup(struct net *net) -{ - if (net_eq(net, &init_net)) - RCU_INIT_POINTER(ip_ct_attach, NULL); - - /* This makes sure all current packets have passed through - netfilter framework. Roll on, two-stage module - delete... */ - synchronize_net(); - nf_conntrack_proto_fini(net); - nf_conntrack_cleanup_net(net); - - if (net_eq(net, &init_net)) { - RCU_INIT_POINTER(nf_ct_destroy, NULL); - nf_conntrack_cleanup_init_net(); - } -} - void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) { struct hlist_nulls_head *hash; @@ -1473,7 +1480,7 @@ void nf_ct_untracked_status_or(unsigned long bits) } EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); -static int nf_conntrack_init_init_net(void) +int nf_conntrack_init_start(void) { int max_factor = 8; int ret, cpu; @@ -1500,11 +1507,44 @@ static int nf_conntrack_init_init_net(void) printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n", NF_CONNTRACK_VERSION, nf_conntrack_htable_size, nf_conntrack_max); + + ret = nf_conntrack_expect_init(); + if (ret < 0) + goto err_expect; + + ret = nf_conntrack_acct_init(); + if (ret < 0) + goto err_acct; + + ret = nf_conntrack_tstamp_init(); + if (ret < 0) + goto err_tstamp; + + ret = nf_conntrack_ecache_init(); + if (ret < 0) + goto err_ecache; + + ret = nf_conntrack_timeout_init(); + if (ret < 0) + goto err_timeout; + + ret = nf_conntrack_helper_init(); + if (ret < 0) + goto err_helper; + + ret = nf_conntrack_labels_init(); + if (ret < 0) + goto err_labels; + #ifdef CONFIG_NF_CONNTRACK_ZONES ret = nf_ct_extend_register(&nf_ct_zone_extend); if (ret < 0) goto err_extend; #endif + ret = nf_conntrack_proto_init(); + if (ret < 0) + goto err_proto; + /* Set up fake conntrack: to never be deleted, not in any hashes */ for_each_possible_cpu(cpu) { struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); @@ -1515,25 +1555,53 @@ static int nf_conntrack_init_init_net(void) nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); return 0; +err_proto: #ifdef CONFIG_NF_CONNTRACK_ZONES + nf_ct_extend_unregister(&nf_ct_zone_extend); err_extend: #endif + nf_conntrack_labels_fini(); +err_labels: + nf_conntrack_helper_fini(); +err_helper: + nf_conntrack_timeout_fini(); +err_timeout: + nf_conntrack_ecache_fini(); +err_ecache: + nf_conntrack_tstamp_fini(); +err_tstamp: + nf_conntrack_acct_fini(); +err_acct: + nf_conntrack_expect_fini(); +err_expect: return ret; } +void nf_conntrack_init_end(void) +{ + /* For use by REJECT target */ + RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); + RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); + + /* Howto get NAT offsets */ + RCU_INIT_POINTER(nf_ct_nat_offset, NULL); +} + /* * We need to use special "null" values, not used in hash table */ #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) #define DYING_NULLS_VAL ((1<<30)+1) +#define TEMPLATE_NULLS_VAL ((1<<30)+2) -static int nf_conntrack_init_net(struct net *net) +int nf_conntrack_init_net(struct net *net) { int ret; atomic_set(&net->ct.count, 0); INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL); INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL); + INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL); net->ct.stat = alloc_percpu(struct ip_conntrack_stat); if (!net->ct.stat) { ret = -ENOMEM; @@ -1562,35 +1630,36 @@ static int nf_conntrack_init_net(struct net *net) printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); goto err_hash; } - ret = nf_conntrack_expect_init(net); + ret = nf_conntrack_expect_pernet_init(net); if (ret < 0) goto err_expect; - ret = nf_conntrack_acct_init(net); + ret = nf_conntrack_acct_pernet_init(net); if (ret < 0) goto err_acct; - ret = nf_conntrack_tstamp_init(net); + ret = nf_conntrack_tstamp_pernet_init(net); if (ret < 0) goto err_tstamp; - ret = nf_conntrack_ecache_init(net); + ret = nf_conntrack_ecache_pernet_init(net); if (ret < 0) goto err_ecache; - ret = nf_conntrack_timeout_init(net); - if (ret < 0) - goto err_timeout; - ret = nf_conntrack_helper_init(net); + ret = nf_conntrack_helper_pernet_init(net); if (ret < 0) goto err_helper; + ret = nf_conntrack_proto_pernet_init(net); + if (ret < 0) + goto err_proto; return 0; + +err_proto: + nf_conntrack_helper_pernet_fini(net); err_helper: - nf_conntrack_timeout_fini(net); -err_timeout: - nf_conntrack_ecache_fini(net); + nf_conntrack_ecache_pernet_fini(net); err_ecache: - nf_conntrack_tstamp_fini(net); + nf_conntrack_tstamp_pernet_fini(net); err_tstamp: - nf_conntrack_acct_fini(net); + nf_conntrack_acct_pernet_fini(net); err_acct: - nf_conntrack_expect_fini(net); + nf_conntrack_expect_pernet_fini(net); err_expect: nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); err_hash: @@ -1607,38 +1676,3 @@ s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); EXPORT_SYMBOL_GPL(nf_ct_nat_offset); - -int nf_conntrack_init(struct net *net) -{ - int ret; - - if (net_eq(net, &init_net)) { - ret = nf_conntrack_init_init_net(); - if (ret < 0) - goto out_init_net; - } - ret = nf_conntrack_proto_init(net); - if (ret < 0) - goto out_proto; - ret = nf_conntrack_init_net(net); - if (ret < 0) - goto out_net; - - if (net_eq(net, &init_net)) { - /* For use by REJECT target */ - RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); - RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); - - /* Howto get NAT offsets */ - RCU_INIT_POINTER(nf_ct_nat_offset, NULL); - } - return 0; - -out_net: - nf_conntrack_proto_fini(net); -out_proto: - if (net_eq(net, &init_net)) - nf_conntrack_cleanup_init_net(); -out_init_net: - return ret; -} diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index faa978f..b5d2eb8 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -233,38 +233,27 @@ static void nf_conntrack_event_fini_sysctl(struct net *net) } #endif /* CONFIG_SYSCTL */ -int nf_conntrack_ecache_init(struct net *net) +int nf_conntrack_ecache_pernet_init(struct net *net) { - int ret; - net->ct.sysctl_events = nf_ct_events; net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout; + return nf_conntrack_event_init_sysctl(net); +} - if (net_eq(net, &init_net)) { - ret = nf_ct_extend_register(&event_extend); - if (ret < 0) { - printk(KERN_ERR "nf_ct_event: Unable to register " - "event extension.\n"); - goto out_extend_register; - } - } +void nf_conntrack_ecache_pernet_fini(struct net *net) +{ + nf_conntrack_event_fini_sysctl(net); +} - ret = nf_conntrack_event_init_sysctl(net); +int nf_conntrack_ecache_init(void) +{ + int ret = nf_ct_extend_register(&event_extend); if (ret < 0) - goto out_sysctl; - - return 0; - -out_sysctl: - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&event_extend); -out_extend_register: + pr_err("nf_ct_event: Unable to register event extension.\n"); return ret; } -void nf_conntrack_ecache_fini(struct net *net) +void nf_conntrack_ecache_fini(void) { - nf_conntrack_event_fini_sysctl(net); - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&event_extend); + nf_ct_extend_unregister(&event_extend); } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 527651a..8c10e3d 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -90,14 +90,13 @@ __nf_ct_expect_find(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; - struct hlist_node *n; unsigned int h; if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) { + hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && nf_ct_zone(i->master) == zone) return i; @@ -130,14 +129,13 @@ nf_ct_find_expectation(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i, *exp = NULL; - struct hlist_node *n; unsigned int h; if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { + hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && nf_ct_zone(i->master) == zone) { @@ -172,13 +170,13 @@ void nf_ct_remove_expectations(struct nf_conn *ct) { struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_expect *exp; - struct hlist_node *n, *next; + struct hlist_node *next; /* Optimization: most connection never expect any others. */ if (!help) return; - hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); @@ -348,9 +346,8 @@ static void evict_oldest_expect(struct nf_conn *master, { struct nf_conn_help *master_help = nfct_help(master); struct nf_conntrack_expect *exp, *last = NULL; - struct hlist_node *n; - hlist_for_each_entry(exp, n, &master_help->expectations, lnode) { + hlist_for_each_entry(exp, &master_help->expectations, lnode) { if (exp->class == new->class) last = exp; } @@ -369,7 +366,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct nf_conn_help *master_help = nfct_help(master); struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(expect); - struct hlist_node *n, *next; + struct hlist_node *next; unsigned int h; int ret = 1; @@ -378,7 +375,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) goto out; } h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) { + hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) { if (expect_matches(i, expect)) { if (del_timer(&i->timeout)) { nf_ct_unlink_expect(i); @@ -571,7 +568,8 @@ static int exp_proc_init(struct net *net) #ifdef CONFIG_NF_CONNTRACK_PROCFS struct proc_dir_entry *proc; - proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops); + proc = proc_create("nf_conntrack_expect", 0440, net->proc_net, + &exp_file_ops); if (!proc) return -ENOMEM; #endif /* CONFIG_NF_CONNTRACK_PROCFS */ @@ -581,59 +579,56 @@ static int exp_proc_init(struct net *net) static void exp_proc_remove(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_PROCFS - proc_net_remove(net, "nf_conntrack_expect"); + remove_proc_entry("nf_conntrack_expect", net->proc_net); #endif /* CONFIG_NF_CONNTRACK_PROCFS */ } module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400); -int nf_conntrack_expect_init(struct net *net) +int nf_conntrack_expect_pernet_init(struct net *net) { int err = -ENOMEM; - if (net_eq(net, &init_net)) { - if (!nf_ct_expect_hsize) { - nf_ct_expect_hsize = net->ct.htable_size / 256; - if (!nf_ct_expect_hsize) - nf_ct_expect_hsize = 1; - } - nf_ct_expect_max = nf_ct_expect_hsize * 4; - } - net->ct.expect_count = 0; net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0); if (net->ct.expect_hash == NULL) goto err1; - if (net_eq(net, &init_net)) { - nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", - sizeof(struct nf_conntrack_expect), - 0, 0, NULL); - if (!nf_ct_expect_cachep) - goto err2; - } - err = exp_proc_init(net); if (err < 0) - goto err3; + goto err2; return 0; - -err3: - if (net_eq(net, &init_net)) - kmem_cache_destroy(nf_ct_expect_cachep); err2: nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); err1: return err; } -void nf_conntrack_expect_fini(struct net *net) +void nf_conntrack_expect_pernet_fini(struct net *net) { exp_proc_remove(net); - if (net_eq(net, &init_net)) { - rcu_barrier(); /* Wait for call_rcu() before destroy */ - kmem_cache_destroy(nf_ct_expect_cachep); - } nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); } + +int nf_conntrack_expect_init(void) +{ + if (!nf_ct_expect_hsize) { + nf_ct_expect_hsize = nf_conntrack_htable_size / 256; + if (!nf_ct_expect_hsize) + nf_ct_expect_hsize = 1; + } + nf_ct_expect_max = nf_ct_expect_hsize * 4; + nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", + sizeof(struct nf_conntrack_expect), + 0, 0, NULL); + if (!nf_ct_expect_cachep) + return -ENOMEM; + return 0; +} + +void nf_conntrack_expect_fini(void) +{ + rcu_barrier(); /* Wait for call_rcu() before destroy */ + kmem_cache_destroy(nf_ct_expect_cachep); +} diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 1ce3bef..62fb8fa 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -435,8 +435,8 @@ skip_nl_seq: connection tracking, not packet filtering. However, it is necessary for accurate tracking in this case. */ - pr_debug("conntrack_ftp: partial %s %u+%u\n", - search[dir][i].pattern, ntohl(th->seq), datalen); + nf_ct_helper_log(skb, ct, "partial matching of `%s'", + search[dir][i].pattern); ret = NF_DROP; goto out; } else if (found == 0) { /* No match */ @@ -450,6 +450,7 @@ skip_nl_seq: exp = nf_ct_expect_alloc(ct); if (exp == NULL) { + nf_ct_helper_log(skb, ct, "cannot alloc expectation"); ret = NF_DROP; goto out; } @@ -500,9 +501,10 @@ skip_nl_seq: protoff, matchoff, matchlen, exp); else { /* Can't expect this? Best to drop packet now. */ - if (nf_ct_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) { + nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; - else + } else ret = NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 962795e..7df7b36 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -623,7 +623,7 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff, drop: spin_unlock_bh(&nf_h323_lock); - net_info_ratelimited("nf_ct_h245: packet dropped\n"); + nf_ct_helper_log(skb, ct, "cannot process H.245 message"); return NF_DROP; } @@ -1197,7 +1197,7 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff, drop: spin_unlock_bh(&nf_h323_lock); - net_info_ratelimited("nf_ct_q931: packet dropped\n"); + nf_ct_helper_log(skb, ct, "cannot process Q.931 message"); return NF_DROP; } @@ -1795,7 +1795,7 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff, drop: spin_unlock_bh(&nf_h323_lock); - net_info_ratelimited("nf_ct_ras: packet dropped\n"); + nf_ct_helper_log(skb, ct, "cannot process RAS message"); return NF_DROP; } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 884f2b3..94b4b98 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -28,6 +28,7 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_log.h> static DEFINE_MUTEX(nf_ct_helper_mutex); struct hlist_head *nf_ct_helper_hash __read_mostly; @@ -115,14 +116,13 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; - struct hlist_node *n; unsigned int h; if (!nf_ct_helper_count) return NULL; h = helper_hash(tuple); - hlist_for_each_entry_rcu(helper, n, &nf_ct_helper_hash[h], hnode) { + hlist_for_each_entry_rcu(helper, &nf_ct_helper_hash[h], hnode) { if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask)) return helper; } @@ -133,11 +133,10 @@ struct nf_conntrack_helper * __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; - struct hlist_node *n; unsigned int i; for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) { + hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) { if (!strcmp(h->name, name) && h->tuple.src.l3num == l3num && h->tuple.dst.protonum == protonum) @@ -236,7 +235,9 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, /* We only allow helper re-assignment of the same sort since * we cannot reallocate the helper extension area. */ - if (help->helper != helper) { + struct nf_conntrack_helper *tmp = rcu_dereference(help->helper); + + if (tmp && tmp->help != helper->help) { RCU_INIT_POINTER(help->helper, NULL); goto out; } @@ -332,11 +333,37 @@ nf_ct_helper_expectfn_find_by_symbol(const void *symbol) } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol); +__printf(3, 4) +void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, + const char *fmt, ...) +{ + const struct nf_conn_help *help; + const struct nf_conntrack_helper *helper; + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + /* Called from the helper function, this call never fails */ + help = nfct_help(ct); + + /* rcu_read_lock()ed by nf_hook_slow */ + helper = rcu_dereference(help->helper); + + nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, + "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf); + + va_end(args); +} +EXPORT_SYMBOL_GPL(nf_ct_helper_log); + int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { int ret = 0; struct nf_conntrack_helper *cur; - struct hlist_node *n; unsigned int h = helper_hash(&me->tuple); BUG_ON(me->expect_policy == NULL); @@ -344,7 +371,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); mutex_lock(&nf_ct_helper_mutex); - hlist_for_each_entry(cur, n, &nf_ct_helper_hash[h], hnode) { + hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { if (strncmp(cur->name, me->name, NF_CT_HELPER_NAME_LEN) == 0 && cur->tuple.src.l3num == me->tuple.src.l3num && cur->tuple.dst.protonum == me->tuple.dst.protonum) { @@ -365,13 +392,13 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; - const struct hlist_node *n, *next; + const struct hlist_node *next; const struct hlist_nulls_node *nn; unsigned int i; /* Get rid of expectations */ for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, n, next, + hlist_for_each_entry_safe(exp, next, &net->ct.expect_hash[i], hnode) { struct nf_conn_help *help = nfct_help(exp->master); if ((rcu_dereference_protected( @@ -423,44 +450,41 @@ static struct nf_ct_ext_type helper_extend __read_mostly = { .id = NF_CT_EXT_HELPER, }; -int nf_conntrack_helper_init(struct net *net) +int nf_conntrack_helper_pernet_init(struct net *net) { - int err; - net->ct.auto_assign_helper_warned = false; net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper; + return nf_conntrack_helper_init_sysctl(net); +} - if (net_eq(net, &init_net)) { - nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ - nf_ct_helper_hash = - nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); - if (!nf_ct_helper_hash) - return -ENOMEM; +void nf_conntrack_helper_pernet_fini(struct net *net) +{ + nf_conntrack_helper_fini_sysctl(net); +} - err = nf_ct_extend_register(&helper_extend); - if (err < 0) - goto err1; +int nf_conntrack_helper_init(void) +{ + int ret; + nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ + nf_ct_helper_hash = + nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); + if (!nf_ct_helper_hash) + return -ENOMEM; + + ret = nf_ct_extend_register(&helper_extend); + if (ret < 0) { + pr_err("nf_ct_helper: Unable to register helper extension.\n"); + goto out_extend; } - err = nf_conntrack_helper_init_sysctl(net); - if (err < 0) - goto out_sysctl; - return 0; - -out_sysctl: - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&helper_extend); -err1: +out_extend: nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); - return err; + return ret; } -void nf_conntrack_helper_fini(struct net *net) +void nf_conntrack_helper_fini(void) { - nf_conntrack_helper_fini_sysctl(net); - if (net_eq(net, &init_net)) { - nf_ct_extend_unregister(&helper_extend); - nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); - } + nf_ct_extend_unregister(&helper_extend); + nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); } diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 3b20aa7..70985c5 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -194,6 +194,8 @@ static int help(struct sk_buff *skb, unsigned int protoff, exp = nf_ct_expect_alloc(ct); if (exp == NULL) { + nf_ct_helper_log(skb, ct, + "cannot alloc expectation"); ret = NF_DROP; goto out; } @@ -210,8 +212,11 @@ static int help(struct sk_buff *skb, unsigned int protoff, addr_beg_p - ib_ptr, addr_end_p - addr_beg_p, exp); - else if (nf_ct_expect_related(exp) != 0) + else if (nf_ct_expect_related(exp) != 0) { + nf_ct_helper_log(skb, ct, + "cannot add expectation"); ret = NF_DROP; + } nf_ct_expect_put(exp); goto out; } diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c new file mode 100644 index 0000000..8fe2e99 --- /dev/null +++ b/net/netfilter/nf_conntrack_labels.c @@ -0,0 +1,112 @@ +/* + * test/set flag bits stored in conntrack extension area. + * + * (C) 2013 Astaro GmbH & Co KG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ctype.h> +#include <linux/export.h> +#include <linux/jhash.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/slab.h> + +#include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_labels.h> + +static unsigned int label_bits(const struct nf_conn_labels *l) +{ + unsigned int longs = l->words; + return longs * BITS_PER_LONG; +} + +bool nf_connlabel_match(const struct nf_conn *ct, u16 bit) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels) + return false; + + return bit < label_bits(labels) && test_bit(bit, labels->bits); +} +EXPORT_SYMBOL_GPL(nf_connlabel_match); + +int nf_connlabel_set(struct nf_conn *ct, u16 bit) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels || bit >= label_bits(labels)) + return -ENOSPC; + + if (test_bit(bit, labels->bits)) + return 0; + + if (test_and_set_bit(bit, labels->bits)) + nf_conntrack_event_cache(IPCT_LABEL, ct); + + return 0; +} +EXPORT_SYMBOL_GPL(nf_connlabel_set); + +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) +static void replace_u32(u32 *address, u32 mask, u32 new) +{ + u32 old, tmp; + + do { + old = *address; + tmp = (old & mask) ^ new; + } while (cmpxchg(address, old, tmp) != old); +} + +int nf_connlabels_replace(struct nf_conn *ct, + const u32 *data, + const u32 *mask, unsigned int words32) +{ + struct nf_conn_labels *labels; + unsigned int size, i; + u32 *dst; + + labels = nf_ct_labels_find(ct); + if (!labels) + return -ENOSPC; + + size = labels->words * sizeof(long); + if (size < (words32 * sizeof(u32))) + words32 = size / sizeof(u32); + + dst = (u32 *) labels->bits; + if (words32) { + for (i = 0; i < words32; i++) + replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]); + } + + size /= sizeof(u32); + for (i = words32; i < size; i++) /* pad */ + replace_u32(&dst[i], 0, 0); + + nf_conntrack_event_cache(IPCT_LABEL, ct); + return 0; +} +EXPORT_SYMBOL_GPL(nf_connlabels_replace); +#endif + +static struct nf_ct_ext_type labels_extend __read_mostly = { + .len = sizeof(struct nf_conn_labels), + .align = __alignof__(struct nf_conn_labels), + .id = NF_CT_EXT_LABELS, +}; + +int nf_conntrack_labels_init(void) +{ + return nf_ct_extend_register(&labels_extend); +} + +void nf_conntrack_labels_fini(void) +{ + nf_ct_extend_unregister(&labels_extend); +} diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 4e078cd..9904b15 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -43,6 +43,7 @@ #include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_timestamp.h> +#include <net/netfilter/nf_conntrack_labels.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_l4proto.h> @@ -323,6 +324,40 @@ nla_put_failure: #define ctnetlink_dump_secctx(a, b) (0) #endif +#ifdef CONFIG_NF_CONNTRACK_LABELS +static int ctnetlink_label_size(const struct nf_conn *ct) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels) + return 0; + return nla_total_size(labels->words * sizeof(long)); +} + +static int +ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + unsigned int len, i; + + if (!labels) + return 0; + + len = labels->words * sizeof(long); + i = 0; + do { + if (labels->bits[i] != 0) + return nla_put(skb, CTA_LABELS, len, labels->bits); + i++; + } while (i < labels->words); + + return 0; +} +#else +#define ctnetlink_dump_labels(a, b) (0) +#define ctnetlink_label_size(a) (0) +#endif + #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple) static inline int @@ -463,6 +498,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, ctnetlink_dump_helpinfo(skb, ct) < 0 || ctnetlink_dump_mark(skb, ct) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || + ctnetlink_dump_labels(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || ctnetlink_dump_master(skb, ct) < 0 || @@ -561,6 +597,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif + ctnetlink_proto_size(ct) + + ctnetlink_label_size(ct) ; } @@ -662,6 +699,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) && ctnetlink_dump_secctx(skb, ct) < 0) goto nla_put_failure; #endif + if (events & (1 << IPCT_LABEL) && + ctnetlink_dump_labels(skb, ct) < 0) + goto nla_put_failure; if (events & (1 << IPCT_RELATED) && ctnetlink_dump_master(skb, ct) < 0) @@ -921,6 +961,7 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, return 0; } +#define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_TUPLE_ORIG] = { .type = NLA_NESTED }, [CTA_TUPLE_REPLY] = { .type = NLA_NESTED }, @@ -937,6 +978,10 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NLA_NESTED }, [CTA_ZONE] = { .type = NLA_U16 }, [CTA_MARK_MASK] = { .type = NLA_U32 }, + [CTA_LABELS] = { .type = NLA_BINARY, + .len = __CTA_LABELS_MAX_LENGTH }, + [CTA_LABELS_MASK] = { .type = NLA_BINARY, + .len = __CTA_LABELS_MAX_LENGTH }, }; static int @@ -1211,13 +1256,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, if (!parse_nat_setup) { #ifdef CONFIG_MODULES rcu_read_unlock(); - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_CTNETLINK); if (request_module("nf-nat") < 0) { - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); return -EOPNOTSUPP; } - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); if (nfnetlink_parse_nat_setup_hook) return -EAGAIN; @@ -1229,13 +1274,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, if (err == -EAGAIN) { #ifdef CONFIG_MODULES rcu_read_unlock(); - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_CTNETLINK); if (request_module("nf-nat-%u", nf_ct_l3num(ct)) < 0) { - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); return -EOPNOTSUPP; } - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); #else err = -EOPNOTSUPP; @@ -1465,6 +1510,31 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct, #endif static int +ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[]) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + size_t len = nla_len(cda[CTA_LABELS]); + const void *mask = cda[CTA_LABELS_MASK]; + + if (len & (sizeof(u32)-1)) /* must be multiple of u32 */ + return -EINVAL; + + if (mask) { + if (nla_len(cda[CTA_LABELS_MASK]) == 0 || + nla_len(cda[CTA_LABELS_MASK]) != len) + return -EINVAL; + mask = nla_data(cda[CTA_LABELS_MASK]); + } + + len /= sizeof(u32); + + return nf_connlabels_replace(ct, nla_data(cda[CTA_LABELS]), mask, len); +#else + return -EOPNOTSUPP; +#endif +} + +static int ctnetlink_change_conntrack(struct nf_conn *ct, const struct nlattr * const cda[]) { @@ -1510,6 +1580,11 @@ ctnetlink_change_conntrack(struct nf_conn *ct, return err; } #endif + if (cda[CTA_LABELS]) { + err = ctnetlink_attach_labels(ct, cda); + if (err < 0) + return err; + } return 0; } @@ -1598,6 +1673,8 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); + nf_ct_labels_ext_add(ct); + /* we must add conntrack extensions before confirmation. */ ct->status |= IPS_CONFIRMED; @@ -1705,6 +1782,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_CREATE) { enum ip_conntrack_events events; + if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY]) + return -EINVAL; + ct = ctnetlink_create_conntrack(net, zone, cda, &otuple, &rtuple, u3); if (IS_ERR(ct)) @@ -1716,6 +1796,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, else events = IPCT_NEW; + if (cda[CTA_LABELS] && + ctnetlink_attach_labels(ct, cda) == 0) + events |= (1 << IPCT_LABEL); + nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | @@ -1983,6 +2067,8 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif + if (ctnetlink_dump_labels(skb, ct) < 0) + goto nla_put_failure; rcu_read_unlock(); return 0; @@ -2011,6 +2097,11 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) if (err < 0) return err; } + if (cda[CTA_LABELS]) { + err = ctnetlink_attach_labels(ct, cda); + if (err < 0) + return err; + } #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); @@ -2279,14 +2370,13 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) struct net *net = sock_net(skb->sk); struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - struct hlist_node *n; u_int8_t l3proto = nfmsg->nfgen_family; rcu_read_lock(); last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: - hlist_for_each_entry(exp, n, &net->ct.expect_hash[cb->args[0]], + hlist_for_each_entry(exp, &net->ct.expect_hash[cb->args[0]], hnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; @@ -2419,7 +2509,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; struct nfgenmsg *nfmsg = nlmsg_data(nlh); - struct hlist_node *n, *next; + struct hlist_node *next; u_int8_t u3 = nfmsg->nfgen_family; unsigned int i; u16 zone; @@ -2466,7 +2556,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* delete all expectations for this helper */ spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, n, next, + hlist_for_each_entry_safe(exp, next, &net->ct.expect_hash[i], hnode) { m_help = nfct_help(exp->master); @@ -2484,7 +2574,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* This basically means we have to flush everything*/ spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, n, next, + hlist_for_each_entry_safe(exp, next, &net->ct.expect_hash[i], hnode) { if (del_timer(&exp->timeout)) { @@ -2624,7 +2714,7 @@ ctnetlink_create_expect(struct net *net, u16 zone, if (!help) { if (!cda[CTA_EXPECT_TIMEOUT]) { err = -EINVAL; - goto out; + goto err_out; } exp->timeout.expires = jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ; diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index cc7669e..e6678d2 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -14,7 +14,7 @@ * Limitations: * - We blindly assume that control connections are always * established in PNS->PAC direction. This is a violation - * of RFFC2673 + * of RFC 2637 * - We can only support one single call within each session * TODO: * - testing of incoming PPTP calls diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 51e928d..58ab405 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -212,8 +212,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct net *net, #endif } -static int -nf_conntrack_l3proto_register_net(struct nf_conntrack_l3proto *proto) +int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) { int ret = 0; struct nf_conntrack_l3proto *old; @@ -242,8 +241,9 @@ out_unlock: return ret; } +EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); -int nf_conntrack_l3proto_register(struct net *net, +int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto) { int ret = 0; @@ -254,22 +254,11 @@ int nf_conntrack_l3proto_register(struct net *net, return ret; } - ret = nf_ct_l3proto_register_sysctl(net, proto); - if (ret < 0) - return ret; - - if (net == &init_net) { - ret = nf_conntrack_l3proto_register_net(proto); - if (ret < 0) - nf_ct_l3proto_unregister_sysctl(net, proto); - } - - return ret; + return nf_ct_l3proto_register_sysctl(net, proto); } -EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register); +EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); -static void -nf_conntrack_l3proto_unregister_net(struct nf_conntrack_l3proto *proto) +void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) { BUG_ON(proto->l3proto >= AF_MAX); @@ -283,19 +272,17 @@ nf_conntrack_l3proto_unregister_net(struct nf_conntrack_l3proto *proto) synchronize_rcu(); } +EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); -void nf_conntrack_l3proto_unregister(struct net *net, +void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto) { - if (net == &init_net) - nf_conntrack_l3proto_unregister_net(proto); - nf_ct_l3proto_unregister_sysctl(net, proto); /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l3proto, proto); } -EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); +EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister); static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, struct nf_conntrack_l4proto *l4proto) @@ -376,8 +363,7 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net, /* FIXME: Allow NULL functions and sub in pointers to generic for them. --RR */ -static int -nf_conntrack_l4proto_register_net(struct nf_conntrack_l4proto *l4proto) +int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto) { int ret = 0; @@ -431,8 +417,9 @@ out_unlock: mutex_unlock(&nf_ct_proto_mutex); return ret; } +EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); -int nf_conntrack_l4proto_register(struct net *net, +int nf_ct_l4proto_pernet_register(struct net *net, struct nf_conntrack_l4proto *l4proto) { int ret = 0; @@ -452,22 +439,13 @@ int nf_conntrack_l4proto_register(struct net *net, if (ret < 0) goto out; - if (net == &init_net) { - ret = nf_conntrack_l4proto_register_net(l4proto); - if (ret < 0) { - nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); - goto out; - } - } - pn->users++; out: return ret; } -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); -static void -nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto) +void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) { BUG_ON(l4proto->l3proto >= PF_MAX); @@ -482,15 +460,13 @@ nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto) synchronize_rcu(); } +EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); -void nf_conntrack_l4proto_unregister(struct net *net, +void nf_ct_l4proto_pernet_unregister(struct net *net, struct nf_conntrack_l4proto *l4proto) { struct nf_proto_net *pn = NULL; - if (net == &init_net) - nf_conntrack_l4proto_unregister_net(l4proto); - pn = nf_ct_l4proto_net(net, l4proto); if (pn == NULL) return; @@ -501,11 +477,10 @@ void nf_conntrack_l4proto_unregister(struct net *net, /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); } -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); -int nf_conntrack_proto_init(struct net *net) +int nf_conntrack_proto_pernet_init(struct net *net) { - unsigned int i; int err; struct nf_proto_net *pn = nf_ct_l4proto_net(net, &nf_conntrack_l4proto_generic); @@ -520,19 +495,12 @@ int nf_conntrack_proto_init(struct net *net) if (err < 0) return err; - if (net == &init_net) { - for (i = 0; i < AF_MAX; i++) - rcu_assign_pointer(nf_ct_l3protos[i], - &nf_conntrack_l3proto_generic); - } - pn->users++; return 0; } -void nf_conntrack_proto_fini(struct net *net) +void nf_conntrack_proto_pernet_fini(struct net *net) { - unsigned int i; struct nf_proto_net *pn = nf_ct_l4proto_net(net, &nf_conntrack_l4proto_generic); @@ -540,9 +508,21 @@ void nf_conntrack_proto_fini(struct net *net) nf_ct_l4proto_unregister_sysctl(net, pn, &nf_conntrack_l4proto_generic); - if (net == &init_net) { - /* free l3proto protocol tables */ - for (i = 0; i < PF_MAX; i++) - kfree(nf_ct_protos[i]); - } +} + +int nf_conntrack_proto_init(void) +{ + unsigned int i; + for (i = 0; i < AF_MAX; i++) + rcu_assign_pointer(nf_ct_l3protos[i], + &nf_conntrack_l3proto_generic); + return 0; +} + +void nf_conntrack_proto_fini(void) +{ + unsigned int i; + /* free l3proto protocol tables */ + for (i = 0; i < PF_MAX; i++) + kfree(nf_ct_protos[i]); } diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index a8ae287..ba65b20 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -935,32 +935,27 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { static __net_init int dccp_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &dccp_proto4); + ret = nf_ct_l4proto_pernet_register(net, &dccp_proto4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_dccp4 :protocol register failed.\n"); + pr_err("nf_conntrack_dccp4: pernet registration failed.\n"); goto out; } - ret = nf_conntrack_l4proto_register(net, - &dccp_proto6); + ret = nf_ct_l4proto_pernet_register(net, &dccp_proto6); if (ret < 0) { - pr_err("nf_conntrack_l4proto_dccp6 :protocol register failed.\n"); + pr_err("nf_conntrack_dccp6: pernet registration failed.\n"); goto cleanup_dccp4; } return 0; cleanup_dccp4: - nf_conntrack_l4proto_unregister(net, - &dccp_proto4); + nf_ct_l4proto_pernet_unregister(net, &dccp_proto4); out: return ret; } static __net_exit void dccp_net_exit(struct net *net) { - nf_conntrack_l4proto_unregister(net, - &dccp_proto6); - nf_conntrack_l4proto_unregister(net, - &dccp_proto4); + nf_ct_l4proto_pernet_unregister(net, &dccp_proto6); + nf_ct_l4proto_pernet_unregister(net, &dccp_proto4); } static struct pernet_operations dccp_net_ops = { @@ -972,11 +967,33 @@ static struct pernet_operations dccp_net_ops = { static int __init nf_conntrack_proto_dccp_init(void) { - return register_pernet_subsys(&dccp_net_ops); + int ret; + + ret = register_pernet_subsys(&dccp_net_ops); + if (ret < 0) + goto out_pernet; + + ret = nf_ct_l4proto_register(&dccp_proto4); + if (ret < 0) + goto out_dccp4; + + ret = nf_ct_l4proto_register(&dccp_proto6); + if (ret < 0) + goto out_dccp6; + + return 0; +out_dccp6: + nf_ct_l4proto_unregister(&dccp_proto4); +out_dccp4: + unregister_pernet_subsys(&dccp_net_ops); +out_pernet: + return ret; } static void __exit nf_conntrack_proto_dccp_fini(void) { + nf_ct_l4proto_unregister(&dccp_proto6); + nf_ct_l4proto_unregister(&dccp_proto4); unregister_pernet_subsys(&dccp_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index b09b7af..155ce9f 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -397,15 +397,15 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { static int proto_gre_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, &nf_conntrack_l4proto_gre4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_gre4); if (ret < 0) - pr_err("nf_conntrack_l4proto_gre4 :protocol register failed.\n"); + pr_err("nf_conntrack_gre4: pernet registration failed.\n"); return ret; } static void proto_gre_net_exit(struct net *net) { - nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_gre4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre4); nf_ct_gre_keymap_flush(net); } @@ -418,11 +418,26 @@ static struct pernet_operations proto_gre_net_ops = { static int __init nf_ct_proto_gre_init(void) { - return register_pernet_subsys(&proto_gre_net_ops); + int ret; + + ret = register_pernet_subsys(&proto_gre_net_ops); + if (ret < 0) + goto out_pernet; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4); + if (ret < 0) + goto out_gre4; + + return 0; +out_gre4: + unregister_pernet_subsys(&proto_gre_net_ops); +out_pernet: + return ret; } static void __exit nf_ct_proto_gre_fini(void) { + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); unregister_pernet_subsys(&proto_gre_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c746d61..ec83536 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -853,33 +853,28 @@ static int sctp_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_sctp4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_sctp4 :protocol register failed.\n"); + pr_err("nf_conntrack_sctp4: pernet registration failed.\n"); goto out; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_sctp6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp6); if (ret < 0) { - pr_err("nf_conntrack_l4proto_sctp6 :protocol register failed.\n"); + pr_err("nf_conntrack_sctp6: pernet registration failed.\n"); goto cleanup_sctp4; } return 0; cleanup_sctp4: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_sctp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4); out: return ret; } static void sctp_net_exit(struct net *net) { - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_sctp6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_sctp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4); } static struct pernet_operations sctp_net_ops = { @@ -891,11 +886,33 @@ static struct pernet_operations sctp_net_ops = { static int __init nf_conntrack_proto_sctp_init(void) { - return register_pernet_subsys(&sctp_net_ops); + int ret; + + ret = register_pernet_subsys(&sctp_net_ops); + if (ret < 0) + goto out_pernet; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4); + if (ret < 0) + goto out_sctp4; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp6); + if (ret < 0) + goto out_sctp6; + + return 0; +out_sctp6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); +out_sctp4: + unregister_pernet_subsys(&sctp_net_ops); +out_pernet: + return ret; } static void __exit nf_conntrack_proto_sctp_fini(void) { + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); unregister_pernet_subsys(&sctp_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 4b66df2..ca969f6 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -336,30 +336,28 @@ static int udplite_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_udplite4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_udplite4 :protocol register failed.\n"); + pr_err("nf_conntrack_udplite4: pernet registration failed.\n"); goto out; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_udplite6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite6); if (ret < 0) { - pr_err("nf_conntrack_l4proto_udplite4 :protocol register failed.\n"); + pr_err("nf_conntrack_udplite6: pernet registration failed.\n"); goto cleanup_udplite4; } return 0; cleanup_udplite4: - nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4); out: return ret; } static void udplite_net_exit(struct net *net) { - nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite6); - nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4); } static struct pernet_operations udplite_net_ops = { @@ -371,11 +369,33 @@ static struct pernet_operations udplite_net_ops = { static int __init nf_conntrack_proto_udplite_init(void) { - return register_pernet_subsys(&udplite_net_ops); + int ret; + + ret = register_pernet_subsys(&udplite_net_ops); + if (ret < 0) + goto out_pernet; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4); + if (ret < 0) + goto out_udplite4; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite6); + if (ret < 0) + goto out_udplite6; + + return 0; +out_udplite6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); +out_udplite4: + unregister_pernet_subsys(&udplite_net_ops); +out_pernet: + return ret; } static void __exit nf_conntrack_proto_udplite_exit(void) { + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); unregister_pernet_subsys(&udplite_net_ops); } diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 295429f..4a2134f 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -138,6 +138,7 @@ static int help(struct sk_buff *skb, exp = nf_ct_expect_alloc(ct); if (exp == NULL) { + nf_ct_helper_log(skb, ct, "cannot alloc expectation"); ret = NF_DROP; goto out; } @@ -151,8 +152,10 @@ static int help(struct sk_buff *skb, nf_ct_dump_tuple(&exp->tuple); /* Can't expect this? Best to drop packet now. */ - if (nf_ct_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) { + nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; + } nf_ct_expect_put(exp); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index df8f4f2..e0c4373 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -855,11 +855,11 @@ static int refresh_signalling_expectation(struct nf_conn *ct, { struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_expect *exp; - struct hlist_node *n, *next; + struct hlist_node *next; int found = 0; spin_lock_bh(&nf_conntrack_lock); - hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { if (exp->class != SIP_EXPECT_SIGNALLING || !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) || exp->tuple.dst.protonum != proto || @@ -881,10 +881,10 @@ static void flush_expectations(struct nf_conn *ct, bool media) { struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_expect *exp; - struct hlist_node *n, *next; + struct hlist_node *next; spin_lock_bh(&nf_conntrack_lock); - hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media) continue; if (!del_timer(&exp->timeout)) @@ -1095,8 +1095,10 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, port = simple_strtoul(*dptr + mediaoff, NULL, 10); if (port == 0) continue; - if (port < 1024 || port > 65535) + if (port < 1024 || port > 65535) { + nf_ct_helper_log(skb, ct, "wrong port %u", port); return NF_DROP; + } /* The media description overrides the session description. */ maddr_len = 0; @@ -1107,15 +1109,20 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, memcpy(&rtp_addr, &maddr, sizeof(rtp_addr)); } else if (caddr_len) memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); - else + else { + nf_ct_helper_log(skb, ct, "cannot parse SDP message"); return NF_DROP; + } ret = set_expected_rtp_rtcp(skb, protoff, dataoff, dptr, datalen, &rtp_addr, htons(port), t->class, mediaoff, medialen); - if (ret != NF_ACCEPT) + if (ret != NF_ACCEPT) { + nf_ct_helper_log(skb, ct, + "cannot add expectation for voice"); return ret; + } /* Update media connection address if present */ if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { @@ -1123,8 +1130,10 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, dptr, datalen, mediaoff, SDP_HDR_CONNECTION, SDP_HDR_MEDIA, &rtp_addr); - if (ret != NF_ACCEPT) + if (ret != NF_ACCEPT) { + nf_ct_helper_log(skb, ct, "cannot mangle SDP"); return ret; + } } i++; } @@ -1258,9 +1267,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, ret = ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, SIP_HDR_CONTACT, NULL, &matchoff, &matchlen, &daddr, &port); - if (ret < 0) + if (ret < 0) { + nf_ct_helper_log(skb, ct, "cannot parse contact"); return NF_DROP; - else if (ret == 0) + } else if (ret == 0) return NF_ACCEPT; /* We don't support third-party registrations */ @@ -1273,8 +1283,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, if (ct_sip_parse_numerical_param(ct, *dptr, matchoff + matchlen, *datalen, - "expires=", NULL, NULL, &expires) < 0) + "expires=", NULL, NULL, &expires) < 0) { + nf_ct_helper_log(skb, ct, "cannot parse expires"); return NF_DROP; + } if (expires == 0) { ret = NF_ACCEPT; @@ -1282,8 +1294,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, } exp = nf_ct_expect_alloc(ct); - if (!exp) + if (!exp) { + nf_ct_helper_log(skb, ct, "cannot alloc expectation"); return NF_DROP; + } saddr = NULL; if (sip_direct_signalling) @@ -1300,9 +1314,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen, exp, matchoff, matchlen); else { - if (nf_ct_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) { + nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; - else + } else ret = NF_ACCEPT; } nf_ct_expect_put(exp); @@ -1356,9 +1371,10 @@ static int process_register_response(struct sk_buff *skb, unsigned int protoff, SIP_HDR_CONTACT, &in_contact, &matchoff, &matchlen, &addr, &port); - if (ret < 0) + if (ret < 0) { + nf_ct_helper_log(skb, ct, "cannot parse contact"); return NF_DROP; - else if (ret == 0) + } else if (ret == 0) break; /* We don't support third-party registrations */ @@ -1373,8 +1389,10 @@ static int process_register_response(struct sk_buff *skb, unsigned int protoff, matchoff + matchlen, *datalen, "expires=", NULL, NULL, &c_expires); - if (ret < 0) + if (ret < 0) { + nf_ct_helper_log(skb, ct, "cannot parse expires"); return NF_DROP; + } if (c_expires == 0) break; if (refresh_signalling_expectation(ct, &addr, proto, port, @@ -1408,15 +1426,21 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff, if (*datalen < strlen("SIP/2.0 200")) return NF_ACCEPT; code = simple_strtoul(*dptr + strlen("SIP/2.0 "), NULL, 10); - if (!code) + if (!code) { + nf_ct_helper_log(skb, ct, "cannot get code"); return NF_DROP; + } if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ, - &matchoff, &matchlen) <= 0) + &matchoff, &matchlen) <= 0) { + nf_ct_helper_log(skb, ct, "cannot parse cseq"); return NF_DROP; + } cseq = simple_strtoul(*dptr + matchoff, NULL, 10); - if (!cseq) + if (!cseq) { + nf_ct_helper_log(skb, ct, "cannot get cseq"); return NF_DROP; + } matchend = matchoff + matchlen + 1; for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { @@ -1440,8 +1464,25 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned int matchoff, matchlen; unsigned int cseq, i; + union nf_inet_addr addr; + __be16 port; + + /* Many Cisco IP phones use a high source port for SIP requests, but + * listen for the response on port 5060. If we are the local + * router for one of these phones, save the port number from the + * Via: header so that nf_nat_sip can redirect the responses to + * the correct port. + */ + if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, + SIP_HDR_VIA_UDP, NULL, &matchoff, + &matchlen, &addr, &port) > 0 && + port != ct->tuplehash[dir].tuple.src.u.udp.port && + nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3)) + ct_sip_info->forced_dport = port; for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { const struct sip_handler *handler; @@ -1454,11 +1495,15 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, continue; if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ, - &matchoff, &matchlen) <= 0) + &matchoff, &matchlen) <= 0) { + nf_ct_helper_log(skb, ct, "cannot parse cseq"); return NF_DROP; + } cseq = simple_strtoul(*dptr + matchoff, NULL, 10); - if (!cseq) + if (!cseq) { + nf_ct_helper_log(skb, ct, "cannot get cseq"); return NF_DROP; + } return handler->request(skb, protoff, dataoff, dptr, datalen, cseq); @@ -1481,8 +1526,10 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { nf_nat_sip = rcu_dereference(nf_nat_sip_hook); if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff, - dptr, datalen)) + dptr, datalen)) { + nf_ct_helper_log(skb, ct, "cannot NAT SIP message"); ret = NF_DROP; + } } return ret; @@ -1547,10 +1594,11 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, msglen = origlen = end - dptr; if (msglen > datalen) - return NF_DROP; + return NF_ACCEPT; ret = process_sip_msg(skb, ct, protoff, dataoff, &dptr, &msglen); + /* process_sip_* functions report why this packet is dropped */ if (ret != NF_ACCEPT) break; diff = msglen - origlen; diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c index 6e545e2..87b95a2 100644 --- a/net/netfilter/nf_conntrack_snmp.c +++ b/net/netfilter/nf_conntrack_snmp.c @@ -16,6 +16,7 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_snmp.h> #define SNMP_PORT 161 diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 363285d..fedee39 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -366,7 +366,7 @@ static int nf_conntrack_standalone_init_proc(struct net *net) { struct proc_dir_entry *pde; - pde = proc_net_fops_create(net, "nf_conntrack", 0440, &ct_file_ops); + pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops); if (!pde) goto out_nf_conntrack; @@ -377,7 +377,7 @@ static int nf_conntrack_standalone_init_proc(struct net *net) return 0; out_stat_nf_conntrack: - proc_net_remove(net, "nf_conntrack"); + remove_proc_entry("nf_conntrack", net->proc_net); out_nf_conntrack: return -ENOMEM; } @@ -385,7 +385,7 @@ out_nf_conntrack: static void nf_conntrack_standalone_fini_proc(struct net *net) { remove_proc_entry("nf_conntrack", net->proc_net_stat); - proc_net_remove(net, "nf_conntrack"); + remove_proc_entry("nf_conntrack", net->proc_net); } #else static int nf_conntrack_standalone_init_proc(struct net *net) @@ -472,13 +472,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) { struct ctl_table *table; - if (net_eq(net, &init_net)) { - nf_ct_netfilter_header = - register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); - if (!nf_ct_netfilter_header) - goto out; - } - table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table), GFP_KERNEL); if (!table) @@ -502,10 +495,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) out_unregister_netfilter: kfree(table); out_kmemdup: - if (net_eq(net, &init_net)) - unregister_net_sysctl_table(nf_ct_netfilter_header); -out: - printk(KERN_ERR "nf_conntrack: can't register to sysctl.\n"); return -ENOMEM; } @@ -513,8 +502,6 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net) { struct ctl_table *table; - if (net_eq(net, &init_net)) - unregister_net_sysctl_table(nf_ct_netfilter_header); table = net->ct.sysctl_header->ctl_table_arg; unregister_net_sysctl_table(net->ct.sysctl_header); kfree(table); @@ -530,51 +517,87 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net) } #endif /* CONFIG_SYSCTL */ -static int nf_conntrack_net_init(struct net *net) +static int nf_conntrack_pernet_init(struct net *net) { int ret; - ret = nf_conntrack_init(net); + ret = nf_conntrack_init_net(net); if (ret < 0) goto out_init; + ret = nf_conntrack_standalone_init_proc(net); if (ret < 0) goto out_proc; + net->ct.sysctl_checksum = 1; net->ct.sysctl_log_invalid = 0; ret = nf_conntrack_standalone_init_sysctl(net); if (ret < 0) goto out_sysctl; + return 0; out_sysctl: nf_conntrack_standalone_fini_proc(net); out_proc: - nf_conntrack_cleanup(net); + nf_conntrack_cleanup_net(net); out_init: return ret; } -static void nf_conntrack_net_exit(struct net *net) +static void nf_conntrack_pernet_exit(struct net *net) { nf_conntrack_standalone_fini_sysctl(net); nf_conntrack_standalone_fini_proc(net); - nf_conntrack_cleanup(net); + nf_conntrack_cleanup_net(net); } static struct pernet_operations nf_conntrack_net_ops = { - .init = nf_conntrack_net_init, - .exit = nf_conntrack_net_exit, + .init = nf_conntrack_pernet_init, + .exit = nf_conntrack_pernet_exit, }; static int __init nf_conntrack_standalone_init(void) { - return register_pernet_subsys(&nf_conntrack_net_ops); + int ret = nf_conntrack_init_start(); + if (ret < 0) + goto out_start; + +#ifdef CONFIG_SYSCTL + nf_ct_netfilter_header = + register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); + if (!nf_ct_netfilter_header) { + pr_err("nf_conntrack: can't register to sysctl.\n"); + ret = -ENOMEM; + goto out_sysctl; + } +#endif + + ret = register_pernet_subsys(&nf_conntrack_net_ops); + if (ret < 0) + goto out_pernet; + + nf_conntrack_init_end(); + return 0; + +out_pernet: +#ifdef CONFIG_SYSCTL + unregister_net_sysctl_table(nf_ct_netfilter_header); +out_sysctl: +#endif + nf_conntrack_cleanup_end(); +out_start: + return ret; } static void __exit nf_conntrack_standalone_fini(void) { + nf_conntrack_cleanup_start(); unregister_pernet_subsys(&nf_conntrack_net_ops); +#ifdef CONFIG_SYSCTL + unregister_net_sysctl_table(nf_ct_netfilter_header); +#endif + nf_conntrack_cleanup_end(); } module_init(nf_conntrack_standalone_init); diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 81fc61c..e9936c8 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -60,8 +60,10 @@ static int tftp_help(struct sk_buff *skb, nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); exp = nf_ct_expect_alloc(ct); - if (exp == NULL) + if (exp == NULL) { + nf_ct_helper_log(skb, ct, "cannot alloc expectation"); return NF_DROP; + } tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), @@ -74,8 +76,10 @@ static int tftp_help(struct sk_buff *skb, nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook); if (nf_nat_tftp && ct->status & IPS_NAT_MASK) ret = nf_nat_tftp(skb, ctinfo, exp); - else if (nf_ct_expect_related(exp) != 0) + else if (nf_ct_expect_related(exp) != 0) { + nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; + } nf_ct_expect_put(exp); break; case TFTP_OPCODE_DATA: diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index a878ce5..93da609 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -37,24 +37,15 @@ static struct nf_ct_ext_type timeout_extend __read_mostly = { .id = NF_CT_EXT_TIMEOUT, }; -int nf_conntrack_timeout_init(struct net *net) +int nf_conntrack_timeout_init(void) { - int ret = 0; - - if (net_eq(net, &init_net)) { - ret = nf_ct_extend_register(&timeout_extend); - if (ret < 0) { - printk(KERN_ERR "nf_ct_timeout: Unable to register " - "timeout extension.\n"); - return ret; - } - } - - return 0; + int ret = nf_ct_extend_register(&timeout_extend); + if (ret < 0) + pr_err("nf_ct_timeout: Unable to register timeout extension.\n"); + return ret; } -void nf_conntrack_timeout_fini(struct net *net) +void nf_conntrack_timeout_fini(void) { - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&timeout_extend); + nf_ct_extend_unregister(&timeout_extend); } diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index 7ea8026..902fb0a 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -88,37 +88,28 @@ static void nf_conntrack_tstamp_fini_sysctl(struct net *net) } #endif -int nf_conntrack_tstamp_init(struct net *net) +int nf_conntrack_tstamp_pernet_init(struct net *net) { - int ret; - net->ct.sysctl_tstamp = nf_ct_tstamp; + return nf_conntrack_tstamp_init_sysctl(net); +} - if (net_eq(net, &init_net)) { - ret = nf_ct_extend_register(&tstamp_extend); - if (ret < 0) { - printk(KERN_ERR "nf_ct_tstamp: Unable to register " - "extension\n"); - goto out_extend_register; - } - } +void nf_conntrack_tstamp_pernet_fini(struct net *net) +{ + nf_conntrack_tstamp_fini_sysctl(net); + nf_ct_extend_unregister(&tstamp_extend); +} - ret = nf_conntrack_tstamp_init_sysctl(net); +int nf_conntrack_tstamp_init(void) +{ + int ret; + ret = nf_ct_extend_register(&tstamp_extend); if (ret < 0) - goto out_sysctl; - - return 0; - -out_sysctl: - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&tstamp_extend); -out_extend_register: + pr_err("nf_ct_tstamp: Unable to register extension\n"); return ret; } -void nf_conntrack_tstamp_fini(struct net *net) +void nf_conntrack_tstamp_fini(void) { - nf_conntrack_tstamp_fini_sysctl(net); - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&tstamp_extend); + nf_ct_extend_unregister(&tstamp_extend); } diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c index 42d3378..3b67c9d 100644 --- a/net/netfilter/nf_nat_amanda.c +++ b/net/netfilter/nf_nat_amanda.c @@ -56,15 +56,19 @@ static unsigned int help(struct sk_buff *skb, } } - if (port == 0) + if (port == 0) { + nf_ct_helper_log(skb, exp->master, "all ports in use"); return NF_DROP; + } sprintf(buffer, "%u", port); ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, protoff, matchoff, matchlen, buffer, strlen(buffer)); - if (ret != NF_ACCEPT) + if (ret != NF_ACCEPT) { + nf_ct_helper_log(skb, exp->master, "cannot mangle packet"); nf_ct_unexpect_related(exp); + } return ret; } diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 5f2f910..ad24be0 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -191,9 +191,8 @@ find_appropriate_src(struct net *net, u16 zone, unsigned int h = hash_by_src(net, zone, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; - const struct hlist_node *n; - hlist_for_each_entry_rcu(nat, n, &net->ct.nat_bysource[h], bysource) { + hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) { ct = nat->ct; if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { /* Copy source part from reply tuple. */ @@ -468,33 +467,22 @@ EXPORT_SYMBOL_GPL(nf_nat_packet); struct nf_nat_proto_clean { u8 l3proto; u8 l4proto; - bool hash; }; -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int nf_nat_proto_clean(struct nf_conn *i, void *data) +/* kill conntracks with affected NAT section */ +static int nf_nat_proto_remove(struct nf_conn *i, void *data) { const struct nf_nat_proto_clean *clean = data; struct nf_conn_nat *nat = nfct_nat(i); if (!nat) return 0; - if (!(i->status & IPS_SRC_NAT_DONE)) - return 0; + if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) return 0; - if (clean->hash) { - spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&nat->bysource); - spin_unlock_bh(&nf_nat_lock); - } else { - memset(nat, 0, sizeof(*nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | - IPS_SEQ_ADJUST); - } - return 0; + return i->status & IPS_NAT_MASK ? 1 : 0; } static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) @@ -506,16 +494,8 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) struct net *net; rtnl_lock(); - /* Step 1 - remove from bysource hash */ - clean.hash = true; for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); - synchronize_rcu(); - - /* Step 2 - clean NAT section */ - clean.hash = false; - for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); rtnl_unlock(); } @@ -527,16 +507,9 @@ static void nf_nat_l3proto_clean(u8 l3proto) struct net *net; rtnl_lock(); - /* Step 1 - remove from bysource hash */ - clean.hash = true; - for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); - synchronize_rcu(); - /* Step 2 - clean NAT section */ - clean.hash = false; for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); rtnl_unlock(); } @@ -774,7 +747,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) { struct nf_nat_proto_clean clean = {}; - nf_ct_iterate_cleanup(net, &nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean); synchronize_rcu(); nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); } diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c index e839b97..e84a578 100644 --- a/net/netfilter/nf_nat_ftp.c +++ b/net/netfilter/nf_nat_ftp.c @@ -96,8 +96,10 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, } } - if (port == 0) + if (port == 0) { + nf_ct_helper_log(skb, ct, "all ports in use"); return NF_DROP; + } buflen = nf_nat_ftp_fmt_cmd(ct, type, buffer, sizeof(buffer), &newaddr, port); @@ -113,6 +115,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, return NF_ACCEPT; out: + nf_ct_helper_log(skb, ct, "cannot mangle packet"); nf_ct_unexpect_related(exp); return NF_DROP; } diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c index 1fedee6..f02b360 100644 --- a/net/netfilter/nf_nat_irc.c +++ b/net/netfilter/nf_nat_irc.c @@ -56,14 +56,18 @@ static unsigned int help(struct sk_buff *skb, } } - if (port == 0) + if (port == 0) { + nf_ct_helper_log(skb, exp->master, "all ports in use"); return NF_DROP; + } ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, protoff, matchoff, matchlen, buffer, strlen(buffer)); - if (ret != NF_ACCEPT) + if (ret != NF_ACCEPT) { + nf_ct_helper_log(skb, exp->master, "cannot mangle packet"); nf_ct_unexpect_related(exp); + } return ret; } diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index 16303c7..96ccdf7 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -95,6 +95,7 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")]; unsigned int buflen; union nf_inet_addr newaddr; @@ -107,7 +108,8 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff, } else if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, addr) && ct->tuplehash[dir].tuple.dst.u.udp.port == port) { newaddr = ct->tuplehash[!dir].tuple.src.u3; - newport = ct->tuplehash[!dir].tuple.src.u.udp.port; + newport = ct_sip_info->forced_dport ? : + ct->tuplehash[!dir].tuple.src.u.udp.port; } else return 1; @@ -144,6 +146,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); unsigned int coff, matchoff, matchlen; enum sip_header_types hdr; union nf_inet_addr addr; @@ -156,8 +159,10 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, &matchoff, &matchlen, &addr, &port) > 0 && !map_addr(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, &addr, port)) + matchoff, matchlen, &addr, port)) { + nf_ct_helper_log(skb, ct, "cannot mangle SIP message"); return NF_DROP; + } request = 1; } else request = 0; @@ -190,8 +195,10 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, olen = *datalen; if (!map_addr(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, &addr, port)) + matchoff, matchlen, &addr, port)) { + nf_ct_helper_log(skb, ct, "cannot mangle Via header"); return NF_DROP; + } matchend = matchoff + matchlen + *datalen - olen; @@ -206,8 +213,10 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, &ct->tuplehash[!dir].tuple.dst.u3, true); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, - poff, plen, buffer, buflen)) + poff, plen, buffer, buflen)) { + nf_ct_helper_log(skb, ct, "cannot mangle maddr"); return NF_DROP; + } } /* The received= parameter (RFC 2361) contains the address @@ -222,6 +231,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, false); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, poff, plen, buffer, buflen)) + nf_ct_helper_log(skb, ct, "cannot mangle received"); return NF_DROP; } @@ -235,8 +245,10 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; buflen = sprintf(buffer, "%u", ntohs(p)); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, - poff, plen, buffer, buflen)) + poff, plen, buffer, buflen)) { + nf_ct_helper_log(skb, ct, "cannot mangle rport"); return NF_DROP; + } } } @@ -250,13 +262,36 @@ next: &addr, &port) > 0) { if (!map_addr(skb, protoff, dataoff, dptr, datalen, matchoff, matchlen, - &addr, port)) + &addr, port)) { + nf_ct_helper_log(skb, ct, "cannot mangle contact"); return NF_DROP; + } } if (!map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_FROM) || - !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO)) + !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO)) { + nf_ct_helper_log(skb, ct, "cannot mangle SIP from/to"); return NF_DROP; + } + + /* Mangle destination port for Cisco phones, then fix up checksums */ + if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) { + struct udphdr *uh; + + if (!skb_make_writable(skb, skb->len)) { + nf_ct_helper_log(skb, ct, "cannot mangle packet"); + return NF_DROP; + } + + uh = (void *)skb->data + protoff; + uh->dest = ct_sip_info->forced_dport; + + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, protoff, + 0, 0, NULL, 0)) { + nf_ct_helper_log(skb, ct, "cannot mangle packet"); + return NF_DROP; + } + } return NF_ACCEPT; } @@ -311,8 +346,10 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); union nf_inet_addr newaddr; u_int16_t port; + __be16 srcport; char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")]; unsigned int buflen; @@ -326,8 +363,9 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, /* If the signalling port matches the connection's source port in the * original direction, try to use the destination port in the opposite * direction. */ - if (exp->tuple.dst.u.udp.port == - ct->tuplehash[dir].tuple.src.u.udp.port) + srcport = ct_sip_info->forced_dport ? : + ct->tuplehash[dir].tuple.src.u.udp.port; + if (exp->tuple.dst.u.udp.port == srcport) port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port); else port = ntohs(exp->tuple.dst.u.udp.port); @@ -351,15 +389,19 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, } } - if (port == 0) + if (port == 0) { + nf_ct_helper_log(skb, ct, "all ports in use for SIP"); return NF_DROP; + } if (!nf_inet_addr_cmp(&exp->tuple.dst.u3, &exp->saved_addr) || exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, port); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, buffer, buflen)) + matchoff, matchlen, buffer, buflen)) { + nf_ct_helper_log(skb, ct, "cannot mangle packet"); goto err; + } } return NF_ACCEPT; @@ -552,14 +594,18 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff, } } - if (port == 0) + if (port == 0) { + nf_ct_helper_log(skb, ct, "all ports in use for SDP media"); goto err1; + } /* Update media port. */ if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen, - mediaoff, medialen, port)) + mediaoff, medialen, port)) { + nf_ct_helper_log(skb, ct, "cannot mangle SDP message"); goto err2; + } return NF_ACCEPT; diff --git a/net/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c index ccabbda..7f67e1d 100644 --- a/net/netfilter/nf_nat_tftp.c +++ b/net/netfilter/nf_nat_tftp.c @@ -28,8 +28,10 @@ static unsigned int help(struct sk_buff *skb, = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; exp->dir = IP_CT_DIR_REPLY; exp->expectfn = nf_nat_follow_master; - if (nf_ct_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) { + nf_ct_helper_log(skb, exp->master, "cannot add expectation"); return NF_DROP; + } return NF_ACCEPT; } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 58a09b7..0b1b32c 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -36,8 +36,10 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; -static const struct nfnetlink_subsystem __rcu *subsys_table[NFNL_SUBSYS_COUNT]; -static DEFINE_MUTEX(nfnl_mutex); +static struct { + struct mutex mutex; + const struct nfnetlink_subsystem __rcu *subsys; +} table[NFNL_SUBSYS_COUNT]; static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_NEW] = NFNL_SUBSYS_CTNETLINK, @@ -48,27 +50,27 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP, }; -void nfnl_lock(void) +void nfnl_lock(__u8 subsys_id) { - mutex_lock(&nfnl_mutex); + mutex_lock(&table[subsys_id].mutex); } EXPORT_SYMBOL_GPL(nfnl_lock); -void nfnl_unlock(void) +void nfnl_unlock(__u8 subsys_id) { - mutex_unlock(&nfnl_mutex); + mutex_unlock(&table[subsys_id].mutex); } EXPORT_SYMBOL_GPL(nfnl_unlock); int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) { - nfnl_lock(); - if (subsys_table[n->subsys_id]) { - nfnl_unlock(); + nfnl_lock(n->subsys_id); + if (table[n->subsys_id].subsys) { + nfnl_unlock(n->subsys_id); return -EBUSY; } - rcu_assign_pointer(subsys_table[n->subsys_id], n); - nfnl_unlock(); + rcu_assign_pointer(table[n->subsys_id].subsys, n); + nfnl_unlock(n->subsys_id); return 0; } @@ -76,9 +78,9 @@ EXPORT_SYMBOL_GPL(nfnetlink_subsys_register); int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n) { - nfnl_lock(); - subsys_table[n->subsys_id] = NULL; - nfnl_unlock(); + nfnl_lock(n->subsys_id); + table[n->subsys_id].subsys = NULL; + nfnl_unlock(n->subsys_id); synchronize_rcu(); return 0; } @@ -91,7 +93,7 @@ static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t t if (subsys_id >= NFNL_SUBSYS_COUNT) return NULL; - return rcu_dereference(subsys_table[subsys_id]); + return rcu_dereference(table[subsys_id].subsys); } static inline const struct nfnl_callback * @@ -175,6 +177,7 @@ replay: struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; struct nlattr *attr = (void *)nlh + min_len; int attrlen = nlh->nlmsg_len - min_len; + __u8 subsys_id = NFNL_SUBSYS_ID(type); err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen, ss->cb[cb_id].policy); @@ -189,10 +192,9 @@ replay: rcu_read_unlock(); } else { rcu_read_unlock(); - nfnl_lock(); - if (rcu_dereference_protected( - subsys_table[NFNL_SUBSYS_ID(type)], - lockdep_is_held(&nfnl_mutex)) != ss || + nfnl_lock(subsys_id); + if (rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(&table[subsys_id].mutex)) != ss || nfnetlink_find_client(type, ss) != nc) err = -EAGAIN; else if (nc->call) @@ -200,7 +202,7 @@ replay: (const struct nlattr **)cda); else err = -EINVAL; - nfnl_unlock(); + nfnl_unlock(subsys_id); } if (err == -EAGAIN) goto replay; @@ -267,6 +269,11 @@ static struct pernet_operations nfnetlink_net_ops = { static int __init nfnetlink_init(void) { + int i; + + for (i=0; i<NFNL_SUBSYS_COUNT; i++) + mutex_init(&table[i].mutex); + pr_info("Netfilter messages via NETLINK v%s.\n", nfversion); return register_pernet_subsys(&nfnetlink_net_ops); } diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 589d686..dc3fd5d 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -49,6 +49,8 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, return -EINVAL; acct_name = nla_data(tb[NFACCT_NAME]); + if (strlen(acct_name) == 0) + return -EINVAL; list_for_each_entry(nfacct, &nfnl_acct_list, head) { if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 945950a..a191b6d 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -282,7 +282,6 @@ nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb, const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; struct nf_conntrack_tuple tuple; - struct hlist_node *n; int ret = 0, i; if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) @@ -296,7 +295,7 @@ nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb, rcu_read_lock(); for (i = 0; i < nf_ct_helper_hsize && !helper; i++) { - hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) { + hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { /* skip non-userspace conntrack helpers. */ if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) @@ -452,13 +451,12 @@ static int nfnl_cthelper_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct nf_conntrack_helper *cur, *last; - struct hlist_node *n; rcu_read_lock(); last = (struct nf_conntrack_helper *)cb->args[1]; for (; cb->args[0] < nf_ct_helper_hsize; cb->args[0]++) { restart: - hlist_for_each_entry_rcu(cur, n, + hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[cb->args[0]], hnode) { /* skip non-userspace conntrack helpers. */ @@ -495,7 +493,6 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, { int ret = -ENOENT, i; struct nf_conntrack_helper *cur; - struct hlist_node *n; struct sk_buff *skb2; char *helper_name = NULL; struct nf_conntrack_tuple tuple; @@ -520,7 +517,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, } for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) { + hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { /* skip non-userspace conntrack helpers. */ if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) @@ -568,7 +565,7 @@ nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb, { char *helper_name = NULL; struct nf_conntrack_helper *cur; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; struct nf_conntrack_tuple tuple; bool tuple_set = false, found = false; int i, j = 0, ret; @@ -585,7 +582,7 @@ nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb, } for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, n, tmp, &nf_ct_helper_hash[i], + hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], hnode) { /* skip non-userspace conntrack helpers. */ if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) @@ -654,13 +651,13 @@ err_out: static void __exit nfnl_cthelper_exit(void) { struct nf_conntrack_helper *cur; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; int i; nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); for (i=0; i<nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, n, tmp, &nf_ct_helper_hash[i], + hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], hnode) { /* skip non-userspace conntrack helpers. */ if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 9f199f2..f248db5 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -13,6 +13,7 @@ */ #include <linux/module.h> #include <linux/skbuff.h> +#include <linux/if_arp.h> #include <linux/init.h> #include <linux/ip.h> #include <linux/ipv6.h> @@ -86,11 +87,10 @@ static struct nfulnl_instance * __instance_lookup(u_int16_t group_num) { struct hlist_head *head; - struct hlist_node *pos; struct nfulnl_instance *inst; head = &instance_table[instance_hashfn(group_num)]; - hlist_for_each_entry_rcu(inst, pos, head, hlist) { + hlist_for_each_entry_rcu(inst, head, hlist) { if (inst->group_num == group_num) return inst; } @@ -384,6 +384,7 @@ __build_packet_message(struct nfulnl_instance *inst, struct nfgenmsg *nfmsg; sk_buff_data_t old_tail = inst->skb->tail; struct sock *sk; + const unsigned char *hwhdrp; nlh = nlmsg_put(inst->skb, 0, 0, NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, @@ -485,9 +486,17 @@ __build_packet_message(struct nfulnl_instance *inst, if (indev && skb_mac_header_was_set(skb)) { if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || nla_put_be16(inst->skb, NFULA_HWLEN, - htons(skb->dev->hard_header_len)) || - nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, - skb_mac_header(skb))) + htons(skb->dev->hard_header_len))) + goto nla_put_failure; + + hwhdrp = skb_mac_header(skb); + + if (skb->dev->type == ARPHRD_SIT) + hwhdrp -= ETH_HLEN; + + if (hwhdrp >= skb->head && + nla_put(inst->skb, NFULA_HWHEADER, + skb->dev->hard_header_len, hwhdrp)) goto nla_put_failure; } @@ -707,11 +716,11 @@ nfulnl_rcv_nl_event(struct notifier_block *this, /* destroy all instances for this portid */ spin_lock_bh(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { - struct hlist_node *tmp, *t2; + struct hlist_node *t2; struct nfulnl_instance *inst; struct hlist_head *head = &instance_table[i]; - hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + hlist_for_each_entry_safe(inst, t2, head, hlist) { if ((net_eq(n->net, &init_net)) && (n->portid == inst->peer_portid)) __instance_destroy(inst); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 3158d87..42680b2 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -80,11 +80,10 @@ static struct nfqnl_instance * instance_lookup(u_int16_t queue_num) { struct hlist_head *head; - struct hlist_node *pos; struct nfqnl_instance *inst; head = &instance_table[instance_hashfn(queue_num)]; - hlist_for_each_entry_rcu(inst, pos, head, hlist) { + hlist_for_each_entry_rcu(inst, head, hlist) { if (inst->queue_num == queue_num) return inst; } @@ -113,7 +112,7 @@ instance_create(u_int16_t queue_num, int portid) inst->queue_num = queue_num; inst->peer_portid = portid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; - inst->copy_range = 0xfffff; + inst->copy_range = 0xffff; inst->copy_mode = NFQNL_COPY_NONE; spin_lock_init(&inst->lock); INIT_LIST_HEAD(&inst->queue_list); @@ -583,11 +582,10 @@ nfqnl_dev_drop(int ifindex) rcu_read_lock(); for (i = 0; i < INSTANCE_BUCKETS; i++) { - struct hlist_node *tmp; struct nfqnl_instance *inst; struct hlist_head *head = &instance_table[i]; - hlist_for_each_entry_rcu(inst, tmp, head, hlist) + hlist_for_each_entry_rcu(inst, head, hlist) nfqnl_flush(inst, dev_cmp, ifindex); } @@ -627,11 +625,11 @@ nfqnl_rcv_nl_event(struct notifier_block *this, /* destroy all instances for this portid */ spin_lock(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { - struct hlist_node *tmp, *t2; + struct hlist_node *t2; struct nfqnl_instance *inst; struct hlist_head *head = &instance_table[i]; - hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + hlist_for_each_entry_safe(inst, t2, head, hlist) { if ((n->net == &init_net) && (n->portid == inst->peer_portid)) __instance_destroy(inst); @@ -1064,8 +1062,10 @@ static int __init nfnetlink_queue_init(void) #ifdef CONFIG_PROC_FS if (!proc_create("nfnetlink_queue", 0440, - proc_net_netfilter, &nfqnl_file_ops)) + proc_net_netfilter, &nfqnl_file_ops)) { + status = -ENOMEM; goto cleanup_subsys; + } #endif register_netdevice_notifier(&nfqnl_dev_notifier); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8d987c3..686c771 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -345,19 +345,27 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target, } EXPORT_SYMBOL_GPL(xt_find_revision); -static char *textify_hooks(char *buf, size_t size, unsigned int mask) +static char * +textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto) { - static const char *const names[] = { + static const char *const inetbr_names[] = { "PREROUTING", "INPUT", "FORWARD", "OUTPUT", "POSTROUTING", "BROUTING", }; - unsigned int i; + static const char *const arp_names[] = { + "INPUT", "FORWARD", "OUTPUT", + }; + const char *const *names; + unsigned int i, max; char *p = buf; bool np = false; int res; + names = (nfproto == NFPROTO_ARP) ? arp_names : inetbr_names; + max = (nfproto == NFPROTO_ARP) ? ARRAY_SIZE(arp_names) : + ARRAY_SIZE(inetbr_names); *p = '\0'; - for (i = 0; i < ARRAY_SIZE(names); ++i) { + for (i = 0; i < max; ++i) { if (!(mask & (1 << i))) continue; res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]); @@ -402,8 +410,10 @@ int xt_check_match(struct xt_mtchk_param *par, pr_err("%s_tables: %s match: used from hooks %s, but only " "valid from %s\n", xt_prefix[par->family], par->match->name, - textify_hooks(used, sizeof(used), par->hook_mask), - textify_hooks(allow, sizeof(allow), par->match->hooks)); + textify_hooks(used, sizeof(used), par->hook_mask, + par->family), + textify_hooks(allow, sizeof(allow), par->match->hooks, + par->family)); return -EINVAL; } if (par->match->proto && (par->match->proto != proto || inv_proto)) { @@ -575,8 +585,10 @@ int xt_check_target(struct xt_tgchk_param *par, pr_err("%s_tables: %s target: used from hooks %s, but only " "usable from %s\n", xt_prefix[par->family], par->target->name, - textify_hooks(used, sizeof(used), par->hook_mask), - textify_hooks(allow, sizeof(allow), par->target->hooks)); + textify_hooks(used, sizeof(used), par->hook_mask, + par->family), + textify_hooks(allow, sizeof(allow), par->target->hooks, + par->family)); return -EINVAL; } if (par->target->proto && (par->target->proto != proto || inv_proto)) { @@ -1311,12 +1323,12 @@ int xt_proto_init(struct net *net, u_int8_t af) out_remove_matches: strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(net, buf); + remove_proc_entry(buf, net->proc_net); out_remove_tables: strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(net, buf); + remove_proc_entry(buf, net->proc_net); out: return -1; #endif @@ -1330,15 +1342,15 @@ void xt_proto_fini(struct net *net, u_int8_t af) strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(net, buf); + remove_proc_entry(buf, net->proc_net); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc_net_remove(net, buf); + remove_proc_entry(buf, net->proc_net); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(net, buf); + remove_proc_entry(buf, net->proc_net); #endif /*CONFIG_PROC_FS*/ } EXPORT_SYMBOL_GPL(xt_proto_fini); diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index ba92824..3228d7f 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -124,6 +124,9 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_audit_info *info = par->targinfo; struct audit_buffer *ab; + if (audit_enabled == 0) + goto errout; + ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT); if (ab == NULL) goto errout; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index ae7f5da..a60261c 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -20,12 +20,8 @@ #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_zones.h> -static unsigned int xt_ct_target_v0(struct sk_buff *skb, - const struct xt_action_param *par) +static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) { - const struct xt_ct_target_info *info = par->targinfo; - struct nf_conn *ct = info->ct; - /* Previously seen (loopback)? Ignore. */ if (skb->nfct != NULL) return XT_CONTINUE; @@ -37,21 +33,22 @@ static unsigned int xt_ct_target_v0(struct sk_buff *skb, return XT_CONTINUE; } -static unsigned int xt_ct_target_v1(struct sk_buff *skb, +static unsigned int xt_ct_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { - const struct xt_ct_target_info_v1 *info = par->targinfo; + const struct xt_ct_target_info *info = par->targinfo; struct nf_conn *ct = info->ct; - /* Previously seen (loopback)? Ignore. */ - if (skb->nfct != NULL) - return XT_CONTINUE; + return xt_ct_target(skb, ct); +} - atomic_inc(&ct->ct_general.use); - skb->nfct = &ct->ct_general; - skb->nfctinfo = IP_CT_NEW; +static unsigned int xt_ct_target_v1(struct sk_buff *skb, + const struct xt_action_param *par) +{ + const struct xt_ct_target_info_v1 *info = par->targinfo; + struct nf_conn *ct = info->ct; - return XT_CONTINUE; + return xt_ct_target(skb, ct); } static u8 xt_ct_find_proto(const struct xt_tgchk_param *par) @@ -104,63 +101,6 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name, return 0; } -static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) -{ - struct xt_ct_target_info *info = par->targinfo; - struct nf_conntrack_tuple t; - struct nf_conn *ct; - int ret; - - if (info->flags & ~XT_CT_NOTRACK) - return -EINVAL; - - if (info->flags & XT_CT_NOTRACK) { - ct = nf_ct_untracked_get(); - atomic_inc(&ct->ct_general.use); - goto out; - } - -#ifndef CONFIG_NF_CONNTRACK_ZONES - if (info->zone) - goto err1; -#endif - - ret = nf_ct_l3proto_try_module_get(par->family); - if (ret < 0) - goto err1; - - memset(&t, 0, sizeof(t)); - ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL); - ret = PTR_ERR(ct); - if (IS_ERR(ct)) - goto err2; - - ret = 0; - if ((info->ct_events || info->exp_events) && - !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events, - GFP_KERNEL)) - goto err3; - - if (info->helper[0]) { - ret = xt_ct_set_helper(ct, info->helper, par); - if (ret < 0) - goto err3; - } - - __set_bit(IPS_TEMPLATE_BIT, &ct->status); - __set_bit(IPS_CONFIRMED_BIT, &ct->status); -out: - info->ct = ct; - return 0; - -err3: - nf_conntrack_free(ct); -err2: - nf_ct_l3proto_module_put(par->family); -err1: - return ret; -} - #ifdef CONFIG_NF_CONNTRACK_TIMEOUT static void __xt_ct_tg_timeout_put(struct ctnl_timeout *timeout) { @@ -238,15 +178,12 @@ out: #endif } -static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) +static int xt_ct_tg_check(const struct xt_tgchk_param *par, + struct xt_ct_target_info_v1 *info) { - struct xt_ct_target_info_v1 *info = par->targinfo; struct nf_conntrack_tuple t; struct nf_conn *ct; - int ret; - - if (info->flags & ~XT_CT_NOTRACK) - return -EINVAL; + int ret = -EOPNOTSUPP; if (info->flags & XT_CT_NOTRACK) { ct = nf_ct_untracked_get(); @@ -289,6 +226,10 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) __set_bit(IPS_TEMPLATE_BIT, &ct->status); __set_bit(IPS_CONFIRMED_BIT, &ct->status); + + /* Overload tuple linked list to put us in template list. */ + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &par->net->ct.tmpl); out: info->ct = ct; return 0; @@ -301,20 +242,49 @@ err1: return ret; } -static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par) +static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) { struct xt_ct_target_info *info = par->targinfo; - struct nf_conn *ct = info->ct; - struct nf_conn_help *help; + struct xt_ct_target_info_v1 info_v1 = { + .flags = info->flags, + .zone = info->zone, + .ct_events = info->ct_events, + .exp_events = info->exp_events, + }; + int ret; - if (!nf_ct_is_untracked(ct)) { - help = nfct_help(ct); - if (help) - module_put(help->helper->me); + if (info->flags & ~XT_CT_NOTRACK) + return -EINVAL; - nf_ct_l3proto_module_put(par->family); - } - nf_ct_put(info->ct); + memcpy(info_v1.helper, info->helper, sizeof(info->helper)); + + ret = xt_ct_tg_check(par, &info_v1); + if (ret < 0) + return ret; + + info->ct = info_v1.ct; + + return ret; +} + +static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) +{ + struct xt_ct_target_info_v1 *info = par->targinfo; + + if (info->flags & ~XT_CT_NOTRACK) + return -EINVAL; + + return xt_ct_tg_check(par, par->targinfo); +} + +static int xt_ct_tg_check_v2(const struct xt_tgchk_param *par) +{ + struct xt_ct_target_info_v1 *info = par->targinfo; + + if (info->flags & ~XT_CT_MASK) + return -EINVAL; + + return xt_ct_tg_check(par, par->targinfo); } static void xt_ct_destroy_timeout(struct nf_conn *ct) @@ -335,9 +305,9 @@ static void xt_ct_destroy_timeout(struct nf_conn *ct) #endif } -static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) +static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, + struct xt_ct_target_info_v1 *info) { - struct xt_ct_target_info_v1 *info = par->targinfo; struct nf_conn *ct = info->ct; struct nf_conn_help *help; @@ -353,6 +323,26 @@ static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) nf_ct_put(info->ct); } +static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par) +{ + struct xt_ct_target_info *info = par->targinfo; + struct xt_ct_target_info_v1 info_v1 = { + .flags = info->flags, + .zone = info->zone, + .ct_events = info->ct_events, + .exp_events = info->exp_events, + .ct = info->ct, + }; + memcpy(info_v1.helper, info->helper, sizeof(info->helper)); + + xt_ct_tg_destroy(par, &info_v1); +} + +static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) +{ + xt_ct_tg_destroy(par, par->targinfo); +} + static struct xt_target xt_ct_tg_reg[] __read_mostly = { { .name = "CT", @@ -375,16 +365,73 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .table = "raw", .me = THIS_MODULE, }, + { + .name = "CT", + .family = NFPROTO_UNSPEC, + .revision = 2, + .targetsize = sizeof(struct xt_ct_target_info_v1), + .checkentry = xt_ct_tg_check_v2, + .destroy = xt_ct_tg_destroy_v1, + .target = xt_ct_target_v1, + .table = "raw", + .me = THIS_MODULE, + }, +}; + +static unsigned int +notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + /* Previously seen (loopback)? Ignore. */ + if (skb->nfct != NULL) + return XT_CONTINUE; + + skb->nfct = &nf_ct_untracked_get()->ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); + + return XT_CONTINUE; +} + +static int notrack_chk(const struct xt_tgchk_param *par) +{ + if (!par->net->xt.notrack_deprecated_warning) { + pr_info("netfilter: NOTRACK target is deprecated, " + "use CT instead or upgrade iptables\n"); + par->net->xt.notrack_deprecated_warning = true; + } + return 0; +} + +static struct xt_target notrack_tg_reg __read_mostly = { + .name = "NOTRACK", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = notrack_chk, + .target = notrack_tg, + .table = "raw", + .me = THIS_MODULE, }; static int __init xt_ct_tg_init(void) { - return xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg)); + int ret; + + ret = xt_register_target(¬rack_tg_reg); + if (ret < 0) + return ret; + + ret = xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg)); + if (ret < 0) { + xt_unregister_target(¬rack_tg_reg); + return ret; + } + return 0; } static void __exit xt_ct_tg_exit(void) { xt_unregister_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg)); + xt_unregister_target(¬rack_tg_reg); } module_init(xt_ct_tg_init); @@ -394,3 +441,5 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: connection tracking target"); MODULE_ALIAS("ipt_CT"); MODULE_ALIAS("ip6t_CT"); +MODULE_ALIAS("ipt_NOTRACK"); +MODULE_ALIAS("ip6t_NOTRACK"); diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index f264032..370adf6 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -43,12 +43,11 @@ static void xt_rateest_hash_insert(struct xt_rateest *est) struct xt_rateest *xt_rateest_lookup(const char *name) { struct xt_rateest *est; - struct hlist_node *n; unsigned int h; h = xt_rateest_hash(name); mutex_lock(&xt_rateest_mutex); - hlist_for_each_entry(est, n, &rateest_hash[h], list) { + hlist_for_each_entry(est, &rateest_hash[h], list) { if (strcmp(est->name, name) == 0) { est->refcnt++; mutex_unlock(&xt_rateest_mutex); diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c new file mode 100644 index 0000000..12d4da8 --- /dev/null +++ b/net/netfilter/xt_bpf.c @@ -0,0 +1,73 @@ +/* Xtables module to match packets using a BPF filter. + * Copyright 2013 Google Inc. + * Written by Willem de Bruijn <willemb@google.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/filter.h> + +#include <linux/netfilter/xt_bpf.h> +#include <linux/netfilter/x_tables.h> + +MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>"); +MODULE_DESCRIPTION("Xtables: BPF filter match"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_bpf"); +MODULE_ALIAS("ip6t_bpf"); + +static int bpf_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_bpf_info *info = par->matchinfo; + struct sock_fprog program; + + program.len = info->bpf_program_num_elem; + program.filter = (struct sock_filter __user *) info->bpf_program; + if (sk_unattached_filter_create(&info->filter, &program)) { + pr_info("bpf: check failed: parse error\n"); + return -EINVAL; + } + + return 0; +} + +static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_bpf_info *info = par->matchinfo; + + return SK_RUN_FILTER(info->filter, skb); +} + +static void bpf_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_bpf_info *info = par->matchinfo; + sk_unattached_filter_destroy(info->filter); +} + +static struct xt_match bpf_mt_reg __read_mostly = { + .name = "bpf", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = bpf_mt_check, + .match = bpf_mt, + .destroy = bpf_mt_destroy, + .matchsize = sizeof(struct xt_bpf_info), + .me = THIS_MODULE, +}; + +static int __init bpf_mt_init(void) +{ + return xt_register_match(&bpf_mt_reg); +} + +static void __exit bpf_mt_exit(void) +{ + xt_unregister_match(&bpf_mt_reg); +} + +module_init(bpf_mt_init); +module_exit(bpf_mt_exit); diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c new file mode 100644 index 0000000..9f8719d --- /dev/null +++ b/net/netfilter/xt_connlabel.c @@ -0,0 +1,99 @@ +/* + * (C) 2013 Astaro GmbH & Co KG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_labels.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_DESCRIPTION("Xtables: add/match connection trackling labels"); +MODULE_ALIAS("ipt_connlabel"); +MODULE_ALIAS("ip6t_connlabel"); + +static bool +connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_connlabel_mtinfo *info = par->matchinfo; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + bool invert = info->options & XT_CONNLABEL_OP_INVERT; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL || nf_ct_is_untracked(ct)) + return invert; + + if (info->options & XT_CONNLABEL_OP_SET) + return (nf_connlabel_set(ct, info->bit) == 0) ^ invert; + + return nf_connlabel_match(ct, info->bit) ^ invert; +} + +static int connlabel_mt_check(const struct xt_mtchk_param *par) +{ + const int options = XT_CONNLABEL_OP_INVERT | + XT_CONNLABEL_OP_SET; + struct xt_connlabel_mtinfo *info = par->matchinfo; + int ret; + size_t words; + + if (info->bit > XT_CONNLABEL_MAXBIT) + return -ERANGE; + + if (info->options & ~options) { + pr_err("Unknown options in mask %x\n", info->options); + return -EINVAL; + } + + ret = nf_ct_l3proto_try_module_get(par->family); + if (ret < 0) { + pr_info("cannot load conntrack support for proto=%u\n", + par->family); + return ret; + } + + par->net->ct.labels_used++; + words = BITS_TO_LONGS(info->bit+1); + if (words > par->net->ct.label_words) + par->net->ct.label_words = words; + + return ret; +} + +static void connlabel_mt_destroy(const struct xt_mtdtor_param *par) +{ + par->net->ct.labels_used--; + if (par->net->ct.labels_used == 0) + par->net->ct.label_words = 0; + nf_ct_l3proto_module_put(par->family); +} + +static struct xt_match connlabels_mt_reg __read_mostly = { + .name = "connlabel", + .family = NFPROTO_UNSPEC, + .checkentry = connlabel_mt_check, + .match = connlabel_mt, + .matchsize = sizeof(struct xt_connlabel_mtinfo), + .destroy = connlabel_mt_destroy, + .me = THIS_MODULE, +}; + +static int __init connlabel_mt_init(void) +{ + return xt_register_match(&connlabels_mt_reg); +} + +static void __exit connlabel_mt_exit(void) +{ + xt_unregister_match(&connlabels_mt_reg); +} + +module_init(connlabel_mt_init); +module_exit(connlabel_mt_exit); diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 70b5591..c40b269 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -101,7 +101,7 @@ static int count_them(struct net *net, { const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; - struct hlist_node *pos, *n; + struct hlist_node *n; struct nf_conn *found_ct; struct hlist_head *hash; bool addit = true; @@ -115,7 +115,7 @@ static int count_them(struct net *net, rcu_read_lock(); /* check the saved connections */ - hlist_for_each_entry_safe(conn, pos, n, hash, node) { + hlist_for_each_entry_safe(conn, n, hash, node) { found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, &conn->tuple); found_ct = NULL; @@ -258,14 +258,14 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_connlimit_info *info = par->matchinfo; struct xt_connlimit_conn *conn; - struct hlist_node *pos, *n; + struct hlist_node *n; struct hlist_head *hash = info->data->iphash; unsigned int i; nf_ct_l3proto_module_put(par->family); for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { - hlist_for_each_entry_safe(conn, pos, n, &hash[i], node) { + hlist_for_each_entry_safe(conn, n, &hash[i], node) { hlist_del(&conn->node); kfree(conn); } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 26a668a..f330e8b 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -141,11 +141,10 @@ dsthash_find(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst) { struct dsthash_ent *ent; - struct hlist_node *pos; u_int32_t hash = hash_dst(ht, dst); if (!hlist_empty(&ht->hash[hash])) { - hlist_for_each_entry_rcu(ent, pos, &ht->hash[hash], node) + hlist_for_each_entry_rcu(ent, &ht->hash[hash], node) if (dst_cmp(ent, dst)) { spin_lock(&ent->lock); return ent; @@ -157,11 +156,22 @@ dsthash_find(const struct xt_hashlimit_htable *ht, /* allocate dsthash_ent, initialize dst, put in htable and lock it */ static struct dsthash_ent * dsthash_alloc_init(struct xt_hashlimit_htable *ht, - const struct dsthash_dst *dst) + const struct dsthash_dst *dst, bool *race) { struct dsthash_ent *ent; spin_lock(&ht->lock); + + /* Two or more packets may race to create the same entry in the + * hashtable, double check if this packet lost race. + */ + ent = dsthash_find(ht, dst); + if (ent != NULL) { + spin_unlock(&ht->lock); + *race = true; + return ent; + } + /* initialize hash with random val at the time we allocate * the first hashtable entry */ if (unlikely(!ht->rnd_initialized)) { @@ -286,8 +296,8 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, spin_lock_bh(&ht->lock); for (i = 0; i < ht->cfg.size; i++) { struct dsthash_ent *dh; - struct hlist_node *pos, *n; - hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) { + struct hlist_node *n; + hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) { if ((*select)(ht, dh)) dsthash_free(ht, dh); } @@ -318,7 +328,10 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo) parent = hashlimit_net->ipt_hashlimit; else parent = hashlimit_net->ip6t_hashlimit; - remove_proc_entry(hinfo->pde->name, parent); + + if(parent != NULL) + remove_proc_entry(hinfo->pde->name, parent); + htable_selective_cleanup(hinfo, select_all); vfree(hinfo); } @@ -329,9 +342,8 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net, { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; - struct hlist_node *pos; - hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) { + hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) { if (!strcmp(name, hinfo->pde->name) && hinfo->family == family) { hinfo->use++; @@ -585,6 +597,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; + bool race = false; u32 cost; if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) @@ -593,13 +606,18 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) rcu_read_lock_bh(); dh = dsthash_find(hinfo, &dst); if (dh == NULL) { - dh = dsthash_alloc_init(hinfo, &dst); + dh = dsthash_alloc_init(hinfo, &dst, &race); if (dh == NULL) { rcu_read_unlock_bh(); goto hotdrop; + } else if (race) { + /* Already got an entry, update expiration timeout */ + dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); + rateinfo_recalc(dh, now, hinfo->cfg.mode); + } else { + dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); + rateinfo_init(dh, hinfo); } - dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_init(dh, hinfo); } else { /* update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); @@ -801,10 +819,9 @@ static int dl_seq_show(struct seq_file *s, void *v) struct xt_hashlimit_htable *htable = s->private; unsigned int *bucket = (unsigned int *)v; struct dsthash_ent *ent; - struct hlist_node *pos; if (!hlist_empty(&htable->hash[*bucket])) { - hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) + hlist_for_each_entry(ent, &htable->hash[*bucket], node) if (dl_seq_real_show(ent, htable->family, s)) return -1; } @@ -847,7 +864,7 @@ static int __net_init hashlimit_proc_net_init(struct net *net) #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) hashlimit_net->ip6t_hashlimit = proc_mkdir("ip6t_hashlimit", net->proc_net); if (!hashlimit_net->ip6t_hashlimit) { - proc_net_remove(net, "ipt_hashlimit"); + remove_proc_entry("ipt_hashlimit", net->proc_net); return -ENOMEM; } #endif @@ -856,9 +873,29 @@ static int __net_init hashlimit_proc_net_init(struct net *net) static void __net_exit hashlimit_proc_net_exit(struct net *net) { - proc_net_remove(net, "ipt_hashlimit"); + struct xt_hashlimit_htable *hinfo; + struct proc_dir_entry *pde; + struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); + + /* recent_net_exit() is called before recent_mt_destroy(). Make sure + * that the parent xt_recent proc entry is is empty before trying to + * remove it. + */ + mutex_lock(&hashlimit_mutex); + pde = hashlimit_net->ipt_hashlimit; + if (pde == NULL) + pde = hashlimit_net->ip6t_hashlimit; + + hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) + remove_proc_entry(hinfo->pde->name, pde); + + hashlimit_net->ipt_hashlimit = NULL; + hashlimit_net->ip6t_hashlimit = NULL; + mutex_unlock(&hashlimit_mutex); + + remove_proc_entry("ipt_hashlimit", net->proc_net); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - proc_net_remove(net, "ip6t_hashlimit"); + remove_proc_entry("ip6t_hashlimit", net->proc_net); #endif } @@ -872,9 +909,6 @@ static int __net_init hashlimit_net_init(struct net *net) static void __net_exit hashlimit_net_exit(struct net *net) { - struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); - - BUG_ON(!hlist_empty(&hashlimit_net->htables)); hashlimit_proc_net_exit(net); } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 4635c9b..d9cad31 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -29,6 +29,7 @@ #include <linux/skbuff.h> #include <linux/inet.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -310,6 +311,14 @@ out: return ret; } +static void recent_table_free(void *addr) +{ + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + static int recent_mt_check(const struct xt_mtchk_param *par, const struct xt_recent_mtinfo_v1 *info) { @@ -322,6 +331,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par, #endif unsigned int i; int ret = -EINVAL; + size_t sz; if (unlikely(!hash_rnd_inited)) { get_random_bytes(&hash_rnd, sizeof(hash_rnd)); @@ -360,8 +370,11 @@ static int recent_mt_check(const struct xt_mtchk_param *par, goto out; } - t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size, - GFP_KERNEL); + sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size; + if (sz <= PAGE_SIZE) + t = kzalloc(sz, GFP_KERNEL); + else + t = vzalloc(sz); if (t == NULL) { ret = -ENOMEM; goto out; @@ -377,14 +390,14 @@ static int recent_mt_check(const struct xt_mtchk_param *par, uid = make_kuid(&init_user_ns, ip_list_uid); gid = make_kgid(&init_user_ns, ip_list_gid); if (!uid_valid(uid) || !gid_valid(gid)) { - kfree(t); + recent_table_free(t); ret = -EINVAL; goto out; } pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent, &recent_mt_fops, t); if (pde == NULL) { - kfree(t); + recent_table_free(t); ret = -ENOMEM; goto out; } @@ -431,10 +444,11 @@ static void recent_mt_destroy(const struct xt_mtdtor_param *par) list_del(&t->list); spin_unlock_bh(&recent_lock); #ifdef CONFIG_PROC_FS - remove_proc_entry(t->name, recent_net->xt_recent); + if (recent_net->xt_recent != NULL) + remove_proc_entry(t->name, recent_net->xt_recent); #endif recent_table_flush(t); - kfree(t); + recent_table_free(t); } mutex_unlock(&recent_mutex); } @@ -526,7 +540,7 @@ static ssize_t recent_mt_proc_write(struct file *file, const char __user *input, size_t size, loff_t *loff) { - const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + const struct proc_dir_entry *pde = PDE(file_inode(file)); struct recent_table *t = pde->data; struct recent_entry *e; char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")]; @@ -615,7 +629,21 @@ static int __net_init recent_proc_net_init(struct net *net) static void __net_exit recent_proc_net_exit(struct net *net) { - proc_net_remove(net, "xt_recent"); + struct recent_net *recent_net = recent_pernet(net); + struct recent_table *t; + + /* recent_net_exit() is called before recent_mt_destroy(). Make sure + * that the parent xt_recent proc entry is is empty before trying to + * remove it. + */ + spin_lock_bh(&recent_lock); + list_for_each_entry(t, &recent_net->tables, list) + remove_proc_entry(t->name, recent_net->xt_recent); + + recent_net->xt_recent = NULL; + spin_unlock_bh(&recent_lock); + + remove_proc_entry("xt_recent", net->proc_net); } #else static inline int recent_proc_net_init(struct net *net) @@ -638,9 +666,6 @@ static int __net_init recent_net_init(struct net *net) static void __net_exit recent_net_exit(struct net *net) { - struct recent_net *recent_net = recent_pernet(net); - - BUG_ON(!list_empty(&recent_net->tables)); recent_proc_net_exit(net); } diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 847d495..8a6c6ea 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1189,8 +1189,6 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, struct netlbl_unlhsh_walk_arg cb_arg; u32 skip_bkt = cb->args[0]; u32 skip_chain = cb->args[1]; - u32 skip_addr4 = cb->args[2]; - u32 skip_addr6 = cb->args[3]; u32 iter_bkt; u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; struct netlbl_unlhsh_iface *iface; @@ -1215,7 +1213,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, continue; netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { - if (iter_addr4++ < skip_addr4) + if (iter_addr4++ < cb->args[2]) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, @@ -1231,7 +1229,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { - if (iter_addr6++ < skip_addr6) + if (iter_addr6++ < cb->args[3]) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, @@ -1250,10 +1248,10 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, unlabel_staticlist_return: rcu_read_unlock(); - cb->args[0] = skip_bkt; - cb->args[1] = skip_chain; - cb->args[2] = skip_addr4; - cb->args[3] = skip_addr6; + cb->args[0] = iter_bkt; + cb->args[1] = iter_chain; + cb->args[2] = iter_addr4; + cb->args[3] = iter_addr6; return skb->len; } @@ -1273,12 +1271,9 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, { struct netlbl_unlhsh_walk_arg cb_arg; struct netlbl_unlhsh_iface *iface; - u32 skip_addr4 = cb->args[0]; - u32 skip_addr6 = cb->args[1]; - u32 iter_addr4 = 0; + u32 iter_addr4 = 0, iter_addr6 = 0; struct netlbl_af4list *addr4; #if IS_ENABLED(CONFIG_IPV6) - u32 iter_addr6 = 0; struct netlbl_af6list *addr6; #endif @@ -1292,7 +1287,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, goto unlabel_staticlistdef_return; netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { - if (iter_addr4++ < skip_addr4) + if (iter_addr4++ < cb->args[0]) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, iface, @@ -1305,7 +1300,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { - if (iter_addr6++ < skip_addr6) + if (iter_addr6++ < cb->args[1]) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, iface, @@ -1320,8 +1315,8 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, unlabel_staticlistdef_return: rcu_read_unlock(); - cb->args[0] = skip_addr4; - cb->args[1] = skip_addr6; + cb->args[0] = iter_addr4; + cb->args[1] = iter_addr6; return skb->len; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c0353d5..1e3fd5b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -248,11 +248,10 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) struct nl_portid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; struct sock *sk; - struct hlist_node *node; read_lock(&nl_table_lock); head = nl_portid_hashfn(hash, portid); - sk_for_each(sk, node, head) { + sk_for_each(sk, head) { if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { sock_hold(sk); goto found; @@ -312,9 +311,9 @@ static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow) for (i = 0; i <= omask; i++) { struct sock *sk; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; - sk_for_each_safe(sk, node, tmp, &otable[i]) + sk_for_each_safe(sk, tmp, &otable[i]) __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid)); } @@ -344,7 +343,6 @@ static void netlink_update_listeners(struct sock *sk) { struct netlink_table *tbl = &nl_table[sk->sk_protocol]; - struct hlist_node *node; unsigned long mask; unsigned int i; struct listeners *listeners; @@ -355,7 +353,7 @@ netlink_update_listeners(struct sock *sk) for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { mask = 0; - sk_for_each_bound(sk, node, &tbl->mc_list) { + sk_for_each_bound(sk, &tbl->mc_list) { if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) mask |= nlk_sk(sk)->groups[i]; } @@ -371,18 +369,17 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) struct hlist_head *head; int err = -EADDRINUSE; struct sock *osk; - struct hlist_node *node; int len; netlink_table_grab(); head = nl_portid_hashfn(hash, portid); len = 0; - sk_for_each(osk, node, head) { + sk_for_each(osk, head) { if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) break; len++; } - if (node) + if (osk) goto err; err = -EBUSY; @@ -575,7 +572,6 @@ static int netlink_autobind(struct socket *sock) struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; - struct hlist_node *node; s32 portid = task_tgid_vnr(current); int err; static s32 rover = -4097; @@ -584,7 +580,7 @@ retry: cond_resched(); netlink_table_grab(); head = nl_portid_hashfn(hash, portid); - sk_for_each(osk, node, head) { + sk_for_each(osk, head) { if (!net_eq(sock_net(osk), net)) continue; if (nlk_sk(osk)->portid == portid) { @@ -809,7 +805,7 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) struct sock *netlink_getsockbyfilp(struct file *filp) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct sock *sock; if (!S_ISSOCK(inode->i_mode)) @@ -1101,7 +1097,6 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid { struct net *net = sock_net(ssk); struct netlink_broadcast_data info; - struct hlist_node *node; struct sock *sk; skb = netlink_trim(skb, allocation); @@ -1124,7 +1119,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid netlink_lock_table(); - sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) + sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) do_one_broadcast(sk, &info); consume_skb(skb); @@ -1200,7 +1195,6 @@ out: int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) { struct netlink_set_err_data info; - struct hlist_node *node; struct sock *sk; int ret = 0; @@ -1212,7 +1206,7 @@ int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) read_lock(&nl_table_lock); - sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) + sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) ret += do_one_set_err(sk, &info); read_unlock(&nl_table_lock); @@ -1676,10 +1670,9 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups) void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) { struct sock *sk; - struct hlist_node *node; struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; - sk_for_each_bound(sk, node, &tbl->mc_list) + sk_for_each_bound(sk, &tbl->mc_list) netlink_update_socket_mc(nlk_sk(sk), group, 0); } @@ -1974,14 +1967,13 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) struct nl_seq_iter *iter = seq->private; int i, j; struct sock *s; - struct hlist_node *node; loff_t off = 0; for (i = 0; i < MAX_LINKS; i++) { struct nl_portid_hash *hash = &nl_table[i].hash; for (j = 0; j <= hash->mask; j++) { - sk_for_each(s, node, &hash->table[j]) { + sk_for_each(s, &hash->table[j]) { if (sock_net(s) != seq_file_net(seq)) continue; if (off == pos) { @@ -2145,7 +2137,7 @@ static const struct net_proto_family netlink_family_ops = { static int __net_init netlink_net_init(struct net *net) { #ifdef CONFIG_PROC_FS - if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) + if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops)) return -ENOMEM; #endif return 0; @@ -2154,7 +2146,7 @@ static int __net_init netlink_net_init(struct net *net) static void __net_exit netlink_net_exit(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_remove(net, "netlink"); + remove_proc_entry("netlink", net->proc_net); #endif } @@ -2185,7 +2177,6 @@ static struct pernet_operations __net_initdata netlink_net_ops = { static int __init netlink_proto_init(void) { - struct sk_buff *dummy_skb; int i; unsigned long limit; unsigned int order; @@ -2194,7 +2185,7 @@ static int __init netlink_proto_init(void) if (err != 0) goto out; - BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)); + BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb)); nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); if (!nl_table) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f2aabb6..5a55be3 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -142,6 +142,7 @@ int genl_register_mc_group(struct genl_family *family, int err = 0; BUG_ON(grp->name[0] == '\0'); + BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL); genl_lock(); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 7261eb8..103bd70 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -104,10 +104,9 @@ static void nr_remove_socket(struct sock *sk) static void nr_kill_by_device(struct net_device *dev) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&nr_list_lock); - sk_for_each(s, node, &nr_list) + sk_for_each(s, &nr_list) if (nr_sk(s)->device == dev) nr_disconnect(s, ENETUNREACH); spin_unlock_bh(&nr_list_lock); @@ -149,10 +148,9 @@ static void nr_insert_socket(struct sock *sk) static struct sock *nr_find_listener(ax25_address *addr) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&nr_list_lock); - sk_for_each(s, node, &nr_list) + sk_for_each(s, &nr_list) if (!ax25cmp(&nr_sk(s)->source_addr, addr) && s->sk_state == TCP_LISTEN) { bh_lock_sock(s); @@ -170,10 +168,9 @@ found: static struct sock *nr_find_socket(unsigned char index, unsigned char id) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&nr_list_lock); - sk_for_each(s, node, &nr_list) { + sk_for_each(s, &nr_list) { struct nr_sock *nr = nr_sk(s); if (nr->my_index == index && nr->my_id == id) { @@ -194,10 +191,9 @@ static struct sock *nr_find_peer(unsigned char index, unsigned char id, ax25_address *dest) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&nr_list_lock); - sk_for_each(s, node, &nr_list) { + sk_for_each(s, &nr_list) { struct nr_sock *nr = nr_sk(s); if (nr->your_index == index && nr->your_id == id && @@ -1177,6 +1173,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, } if (sax != NULL) { + memset(sax, 0, sizeof(*sax)); sax->sax25_family = AF_NETROM; skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, AX25_ADDR_LEN); @@ -1452,9 +1449,9 @@ static int __init nr_proto_init(void) nr_loopback_init(); - proc_net_fops_create(&init_net, "nr", S_IRUGO, &nr_info_fops); - proc_net_fops_create(&init_net, "nr_neigh", S_IRUGO, &nr_neigh_fops); - proc_net_fops_create(&init_net, "nr_nodes", S_IRUGO, &nr_nodes_fops); + proc_create("nr", S_IRUGO, init_net.proc_net, &nr_info_fops); + proc_create("nr_neigh", S_IRUGO, init_net.proc_net, &nr_neigh_fops); + proc_create("nr_nodes", S_IRUGO, init_net.proc_net, &nr_nodes_fops); out: return rc; fail: @@ -1482,9 +1479,9 @@ static void __exit nr_exit(void) { int i; - proc_net_remove(&init_net, "nr"); - proc_net_remove(&init_net, "nr_neigh"); - proc_net_remove(&init_net, "nr_nodes"); + remove_proc_entry("nr", init_net.proc_net); + remove_proc_entry("nr_neigh", init_net.proc_net); + remove_proc_entry("nr_nodes", init_net.proc_net); nr_loopback_clear(); nr_rt_free(); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 70ffff7..b976d5e 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -49,10 +49,9 @@ static struct nr_node *nr_node_get(ax25_address *callsign) { struct nr_node *found = NULL; struct nr_node *nr_node; - struct hlist_node *node; spin_lock_bh(&nr_node_list_lock); - nr_node_for_each(nr_node, node, &nr_node_list) + nr_node_for_each(nr_node, &nr_node_list) if (ax25cmp(callsign, &nr_node->callsign) == 0) { nr_node_hold(nr_node); found = nr_node; @@ -67,10 +66,9 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign, { struct nr_neigh *found = NULL; struct nr_neigh *nr_neigh; - struct hlist_node *node; spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each(nr_neigh, node, &nr_neigh_list) + nr_neigh_for_each(nr_neigh, &nr_neigh_list) if (ax25cmp(callsign, &nr_neigh->callsign) == 0 && nr_neigh->dev == dev) { nr_neigh_hold(nr_neigh); @@ -114,10 +112,9 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, */ if (nr_neigh != NULL && nr_neigh->failed != 0 && quality == 0) { struct nr_node *nr_nodet; - struct hlist_node *node; spin_lock_bh(&nr_node_list_lock); - nr_node_for_each(nr_nodet, node, &nr_node_list) { + nr_node_for_each(nr_nodet, &nr_node_list) { nr_node_lock(nr_nodet); for (i = 0; i < nr_nodet->count; i++) if (nr_nodet->routes[i].neighbour == nr_neigh) @@ -485,11 +482,11 @@ static int nr_dec_obs(void) { struct nr_neigh *nr_neigh; struct nr_node *s; - struct hlist_node *node, *nodet; + struct hlist_node *nodet; int i; spin_lock_bh(&nr_node_list_lock); - nr_node_for_each_safe(s, node, nodet, &nr_node_list) { + nr_node_for_each_safe(s, nodet, &nr_node_list) { nr_node_lock(s); for (i = 0; i < s->count; i++) { switch (s->routes[i].obs_count) { @@ -540,15 +537,15 @@ static int nr_dec_obs(void) void nr_rt_device_down(struct net_device *dev) { struct nr_neigh *s; - struct hlist_node *node, *nodet, *node2, *node2t; + struct hlist_node *nodet, *node2t; struct nr_node *t; int i; spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each_safe(s, node, nodet, &nr_neigh_list) { + nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) { if (s->dev == dev) { spin_lock_bh(&nr_node_list_lock); - nr_node_for_each_safe(t, node2, node2t, &nr_node_list) { + nr_node_for_each_safe(t, node2t, &nr_node_list) { nr_node_lock(t); for (i = 0; i < t->count; i++) { if (t->routes[i].neighbour == s) { @@ -737,11 +734,10 @@ int nr_rt_ioctl(unsigned int cmd, void __user *arg) void nr_link_failed(ax25_cb *ax25, int reason) { struct nr_neigh *s, *nr_neigh = NULL; - struct hlist_node *node; struct nr_node *nr_node = NULL; spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each(s, node, &nr_neigh_list) { + nr_neigh_for_each(s, &nr_neigh_list) { if (s->ax25 == ax25) { nr_neigh_hold(s); nr_neigh = s; @@ -761,7 +757,7 @@ void nr_link_failed(ax25_cb *ax25, int reason) return; } spin_lock_bh(&nr_node_list_lock); - nr_node_for_each(nr_node, node, &nr_node_list) { + nr_node_for_each(nr_node, &nr_node_list) { nr_node_lock(nr_node); if (nr_node->which < nr_node->count && nr_node->routes[nr_node->which].neighbour == nr_neigh) @@ -1013,16 +1009,16 @@ void __exit nr_rt_free(void) { struct nr_neigh *s = NULL; struct nr_node *t = NULL; - struct hlist_node *node, *nodet; + struct hlist_node *nodet; spin_lock_bh(&nr_neigh_list_lock); spin_lock_bh(&nr_node_list_lock); - nr_node_for_each_safe(t, node, nodet, &nr_node_list) { + nr_node_for_each_safe(t, nodet, &nr_node_list) { nr_node_lock(t); nr_remove_node_locked(t); nr_node_unlock(t); } - nr_neigh_for_each_safe(s, node, nodet, &nr_neigh_list) { + nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) { while(s->count) { s->count--; nr_neigh_put(s); diff --git a/net/nfc/core.c b/net/nfc/core.c index aa64ea4..6ceee8e 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -338,7 +338,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol) dev->active_target = target; dev->rf_mode = NFC_RF_INITIATOR; - if (dev->ops->check_presence) + if (dev->ops->check_presence && !dev->shutting_down) mod_timer(&dev->check_pres_timer, jiffies + msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); } @@ -429,7 +429,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, rc = dev->ops->im_transceive(dev, dev->active_target, skb, cb, cb_context); - if (!rc && dev->ops->check_presence) + if (!rc && dev->ops->check_presence && !dev->shutting_down) mod_timer(&dev->check_pres_timer, jiffies + msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); } else if (dev->rf_mode == NFC_RF_TARGET && dev->ops->tm_send != NULL) { @@ -684,11 +684,6 @@ static void nfc_release(struct device *d) pr_debug("dev_name=%s\n", dev_name(&dev->dev)); - if (dev->ops->check_presence) { - del_timer_sync(&dev->check_pres_timer); - cancel_work_sync(&dev->check_pres_work); - } - nfc_genl_data_exit(&dev->genl_data); kfree(dev->targets); kfree(dev); @@ -706,15 +701,16 @@ static void nfc_check_pres_work(struct work_struct *work) rc = dev->ops->check_presence(dev, dev->active_target); if (rc == -EOPNOTSUPP) goto exit; - if (!rc) { - mod_timer(&dev->check_pres_timer, jiffies + - msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); - } else { + if (rc) { u32 active_target_idx = dev->active_target->idx; device_unlock(&dev->dev); nfc_target_lost(dev, active_target_idx); return; } + + if (!dev->shutting_down) + mod_timer(&dev->check_pres_timer, jiffies + + msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); } exit: @@ -734,10 +730,10 @@ struct class nfc_class = { }; EXPORT_SYMBOL(nfc_class); -static int match_idx(struct device *d, void *data) +static int match_idx(struct device *d, const void *data) { struct nfc_dev *dev = to_nfc_dev(d); - unsigned int *idx = data; + const unsigned int *idx = data; return dev->idx == *idx; } @@ -761,6 +757,7 @@ struct nfc_dev *nfc_get_device(unsigned int idx) */ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, u32 supported_protocols, + u32 supported_se, int tx_headroom, int tx_tailroom) { struct nfc_dev *dev; @@ -778,6 +775,8 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->ops = ops; dev->supported_protocols = supported_protocols; + dev->supported_se = supported_se; + dev->active_se = NFC_SE_NONE; dev->tx_headroom = tx_headroom; dev->tx_tailroom = tx_tailroom; @@ -853,26 +852,27 @@ void nfc_unregister_device(struct nfc_dev *dev) id = dev->idx; - mutex_lock(&nfc_devlist_mutex); - nfc_devlist_generation++; - - /* lock to avoid unregistering a device while an operation - is in progress */ - device_lock(&dev->dev); - device_del(&dev->dev); - device_unlock(&dev->dev); + if (dev->ops->check_presence) { + device_lock(&dev->dev); + dev->shutting_down = true; + device_unlock(&dev->dev); + del_timer_sync(&dev->check_pres_timer); + cancel_work_sync(&dev->check_pres_work); + } - mutex_unlock(&nfc_devlist_mutex); + rc = nfc_genl_device_removed(dev); + if (rc) + pr_debug("The userspace won't be notified that the device %s " + "was removed\n", dev_name(&dev->dev)); nfc_llcp_unregister_device(dev); - rc = nfc_genl_device_removed(dev); - if (rc) - pr_debug("The userspace won't be notified that the device %s was removed\n", - dev_name(&dev->dev)); + mutex_lock(&nfc_devlist_mutex); + nfc_devlist_generation++; + device_del(&dev->dev); + mutex_unlock(&nfc_devlist_mutex); ida_simple_remove(&nfc_index_ida, id); - } EXPORT_SYMBOL(nfc_unregister_device); diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 7d99410..64f922b 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -280,14 +280,19 @@ static int nfc_hci_delete_pipe(struct nfc_hci_dev *hdev, u8 pipe) static int nfc_hci_clear_all_pipes(struct nfc_hci_dev *hdev) { u8 param[2]; + size_t param_len = 2; /* TODO: Find out what the identity reference data is * and fill param with it. HCI spec 6.1.3.5 */ pr_debug("\n"); + if (test_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &hdev->quirks)) + param_len = 0; + return nfc_hci_execute_cmd(hdev, NFC_HCI_ADMIN_PIPE, - NFC_HCI_ADM_CLEAR_ALL_PIPE, param, 2, NULL); + NFC_HCI_ADM_CLEAR_ALL_PIPE, param, param_len, + NULL); } int nfc_hci_disconnect_gate(struct nfc_hci_dev *hdev, u8 gate) diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 7bea574..91020b2 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -57,6 +57,8 @@ static void nfc_hci_msg_tx_work(struct work_struct *work) int r = 0; mutex_lock(&hdev->msg_tx_mutex); + if (hdev->shutting_down) + goto exit; if (hdev->cmd_pending_msg) { if (timer_pending(&hdev->cmd_timer) == 0) { @@ -295,6 +297,12 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, goto exit; } + if (hdev->ops->event_received) { + r = hdev->ops->event_received(hdev, gate, event, skb); + if (r <= 0) + goto exit_noskb; + } + switch (event) { case NFC_HCI_EVT_TARGET_DISCOVERED: if (skb->len < 1) { /* no status data? */ @@ -320,17 +328,15 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, r = nfc_hci_target_discovered(hdev, gate); break; default: - if (hdev->ops->event_received) { - hdev->ops->event_received(hdev, gate, event, skb); - return; - } - + pr_info("Discarded unknown event %x to gate %x\n", event, gate); + r = -EINVAL; break; } exit: kfree_skb(skb); +exit_noskb: if (r) { /* TODO: There was an error dispatching the event, * how to propagate up to nfc core? @@ -669,8 +675,10 @@ static int hci_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) if (hdev->ops->tm_send) return hdev->ops->tm_send(hdev, skb); - else - return -ENOTSUPP; + + kfree_skb(skb); + + return -ENOTSUPP; } static int hci_check_presence(struct nfc_dev *nfc_dev, @@ -787,7 +795,9 @@ static struct nfc_ops hci_nfc_ops = { struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, + unsigned long quirks, u32 protocols, + u32 supported_se, const char *llc_name, int tx_headroom, int tx_tailroom, @@ -813,7 +823,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, return NULL; } - hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, + hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, supported_se, tx_headroom + HCI_CMDS_HEADROOM, tx_tailroom); if (!hdev->ndev) { @@ -830,6 +840,8 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); + hdev->quirks = quirks; + return hdev; } EXPORT_SYMBOL(nfc_hci_allocate_device); @@ -868,6 +880,28 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev) { struct hci_msg *msg, *n; + mutex_lock(&hdev->msg_tx_mutex); + + if (hdev->cmd_pending_msg) { + if (hdev->cmd_pending_msg->cb) + hdev->cmd_pending_msg->cb( + hdev->cmd_pending_msg->cb_context, + NULL, -ESHUTDOWN); + kfree(hdev->cmd_pending_msg); + hdev->cmd_pending_msg = NULL; + } + + hdev->shutting_down = true; + + mutex_unlock(&hdev->msg_tx_mutex); + + del_timer_sync(&hdev->cmd_timer); + cancel_work_sync(&hdev->msg_tx_work); + + cancel_work_sync(&hdev->msg_rx_work); + + nfc_unregister_device(hdev->ndev); + skb_queue_purge(&hdev->rx_hcp_frags); skb_queue_purge(&hdev->msg_rx_queue); @@ -876,13 +910,6 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev) skb_queue_purge(&msg->msg_frags); kfree(msg); } - - del_timer_sync(&hdev->cmd_timer); - - nfc_unregister_device(hdev->ndev); - - cancel_work_sync(&hdev->msg_tx_work); - cancel_work_sync(&hdev->msg_rx_work); } EXPORT_SYMBOL(nfc_hci_unregister_device); diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c index bc308a7..b6b4109 100644 --- a/net/nfc/hci/hcp.c +++ b/net/nfc/hci/hcp.c @@ -105,6 +105,13 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, } mutex_lock(&hdev->msg_tx_mutex); + + if (hdev->shutting_down) { + err = -ESHUTDOWN; + mutex_unlock(&hdev->msg_tx_mutex); + goto out_skb_err; + } + list_add_tail(&cmd->msg_l, &hdev->msg_tx_queue); mutex_unlock(&hdev->msg_tx_mutex); diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c index df24be4..c6bc3bd 100644 --- a/net/nfc/llcp/commands.c +++ b/net/nfc/llcp/commands.c @@ -304,6 +304,8 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) skb = llcp_add_header(skb, 0, 0, LLCP_PDU_SYMM); + __net_timestamp(skb); + nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX); return nfc_data_exchange(dev, local->target_idx, skb, diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index ec43914..ee25f25 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -54,7 +54,6 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock) skb_queue_purge(&sock->tx_queue); skb_queue_purge(&sock->tx_pending_queue); - skb_queue_purge(&sock->tx_backlog_queue); if (local == NULL) return; @@ -69,17 +68,18 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock) } } -static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) +static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, + int err) { struct sock *sk; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; struct nfc_llcp_sock *llcp_sock; skb_queue_purge(&local->tx_queue); write_lock(&local->sockets.lock); - sk_for_each_safe(sk, node, tmp, &local->sockets.head) { + sk_for_each_safe(sk, tmp, &local->sockets.head) { llcp_sock = nfc_llcp_sock(sk); bh_lock_sock(sk); @@ -101,11 +101,12 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) nfc_llcp_accept_unlink(accept_sk); + if (err) + accept_sk->sk_err = err; accept_sk->sk_state = LLCP_CLOSED; + accept_sk->sk_state_change(sk); bh_unlock_sock(accept_sk); - - sock_orphan(accept_sk); } if (listen == true) { @@ -124,16 +125,45 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) continue; } + if (err) + sk->sk_err = err; sk->sk_state = LLCP_CLOSED; + sk->sk_state_change(sk); bh_unlock_sock(sk); - sock_orphan(sk); - sk_del_node_init(sk); } write_unlock(&local->sockets.lock); + + /* + * If we want to keep the listening sockets alive, + * we don't touch the RAW ones. + */ + if (listen == true) + return; + + write_lock(&local->raw_sockets.lock); + + sk_for_each_safe(sk, tmp, &local->raw_sockets.head) { + llcp_sock = nfc_llcp_sock(sk); + + bh_lock_sock(sk); + + nfc_llcp_socket_purge(llcp_sock); + + if (err) + sk->sk_err = err; + sk->sk_state = LLCP_CLOSED; + sk->sk_state_change(sk); + + bh_unlock_sock(sk); + + sk_del_node_init(sk); + } + + write_unlock(&local->raw_sockets.lock); } struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) @@ -143,20 +173,25 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) return local; } -static void local_release(struct kref *ref) +static void local_cleanup(struct nfc_llcp_local *local, bool listen) { - struct nfc_llcp_local *local; - - local = container_of(ref, struct nfc_llcp_local, ref); - - list_del(&local->list); - nfc_llcp_socket_release(local, false); + nfc_llcp_socket_release(local, listen, ENXIO); del_timer_sync(&local->link_timer); skb_queue_purge(&local->tx_queue); cancel_work_sync(&local->tx_work); cancel_work_sync(&local->rx_work); cancel_work_sync(&local->timeout_work); kfree_skb(local->rx_pending); +} + +static void local_release(struct kref *ref) +{ + struct nfc_llcp_local *local; + + local = container_of(ref, struct nfc_llcp_local, ref); + + list_del(&local->list); + local_cleanup(local, false); kfree(local); } @@ -172,7 +207,6 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local, u8 ssap, u8 dsap) { struct sock *sk; - struct hlist_node *node; struct nfc_llcp_sock *llcp_sock, *tmp_sock; pr_debug("ssap dsap %d %d\n", ssap, dsap); @@ -184,7 +218,7 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local, llcp_sock = NULL; - sk_for_each(sk, node, &local->sockets.head) { + sk_for_each(sk, &local->sockets.head) { tmp_sock = nfc_llcp_sock(sk); if (tmp_sock->ssap == ssap && tmp_sock->dsap == dsap) { @@ -273,7 +307,6 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, u8 *sn, size_t sn_len) { struct sock *sk; - struct hlist_node *node; struct nfc_llcp_sock *llcp_sock, *tmp_sock; pr_debug("sn %zd %p\n", sn_len, sn); @@ -285,7 +318,7 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, llcp_sock = NULL; - sk_for_each(sk, node, &local->sockets.head) { + sk_for_each(sk, &local->sockets.head) { tmp_sock = nfc_llcp_sock(sk); pr_debug("llcp sock %p\n", tmp_sock); @@ -550,14 +583,13 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len) pr_err("No LLCP device\n"); return -ENODEV; } + if (gb_len < 3) + return -EINVAL; memset(local->remote_gb, 0, NFC_MAX_GT_LEN); memcpy(local->remote_gb, gb, gb_len); local->remote_gb_len = gb_len; - if (local->remote_gb == NULL || local->remote_gb_len == 0) - return -ENODEV; - if (memcmp(local->remote_gb, llcp_magic, 3)) { pr_err("MAC does not support LLCP\n"); return -EINVAL; @@ -603,14 +635,13 @@ static void nfc_llcp_set_nrns(struct nfc_llcp_sock *sock, struct sk_buff *pdu) void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, struct sk_buff *skb, u8 direction) { - struct hlist_node *node; struct sk_buff *skb_copy = NULL, *nskb; struct sock *sk; u8 *data; read_lock(&local->raw_sockets.lock); - sk_for_each(sk, node, &local->raw_sockets.head) { + sk_for_each(sk, &local->raw_sockets.head) { if (sk->sk_state != LLCP_BOUND) continue; @@ -668,6 +699,8 @@ static void nfc_llcp_tx_work(struct work_struct *work) if (ptype == LLCP_PDU_I) copy_skb = skb_copy(skb, GFP_ATOMIC); + __net_timestamp(skb); + nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX); @@ -697,11 +730,10 @@ static struct nfc_llcp_sock *nfc_llcp_connecting_sock_get(struct nfc_llcp_local { struct sock *sk; struct nfc_llcp_sock *llcp_sock; - struct hlist_node *node; read_lock(&local->connecting_sockets.lock); - sk_for_each(sk, node, &local->connecting_sockets.head) { + sk_for_each(sk, &local->connecting_sockets.head) { llcp_sock = nfc_llcp_sock(sk); if (llcp_sock->ssap == ssap) { @@ -781,9 +813,14 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local, /* There is no sequence with UI frames */ skb_pull(skb, LLCP_HEADER_SIZE); - if (sock_queue_rcv_skb(&llcp_sock->sk, skb)) { - pr_err("receive queue is full\n"); - skb_queue_head(&llcp_sock->tx_backlog_queue, skb); + if (!sock_queue_rcv_skb(&llcp_sock->sk, skb)) { + /* + * UI frames will be freed from the socket layer, so we + * need to keep them alive until someone receives them. + */ + skb_get(skb); + } else { + pr_err("Receive queue is full\n"); } nfc_llcp_sock_put(llcp_sock); @@ -976,9 +1013,14 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local, pr_err("Received out of sequence I PDU\n"); skb_pull(skb, LLCP_HEADER_SIZE + LLCP_SEQUENCE_SIZE); - if (sock_queue_rcv_skb(&llcp_sock->sk, skb)) { - pr_err("receive queue is full\n"); - skb_queue_head(&llcp_sock->tx_backlog_queue, skb); + if (!sock_queue_rcv_skb(&llcp_sock->sk, skb)) { + /* + * I frames will be freed from the socket layer, so we + * need to keep them alive until someone receives them. + */ + skb_get(skb); + } else { + pr_err("Receive queue is full\n"); } } @@ -1245,6 +1287,8 @@ static void nfc_llcp_rx_work(struct work_struct *work) print_hex_dump(KERN_DEBUG, "LLCP Rx: ", DUMP_PREFIX_OFFSET, 16, 1, skb->data, skb->len, true); + __net_timestamp(skb); + nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX); switch (ptype) { @@ -1296,6 +1340,13 @@ static void nfc_llcp_rx_work(struct work_struct *work) local->rx_pending = NULL; } +static void __nfc_llcp_recv(struct nfc_llcp_local *local, struct sk_buff *skb) +{ + local->rx_pending = skb; + del_timer(&local->link_timer); + schedule_work(&local->rx_work); +} + void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) { struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; @@ -1306,9 +1357,7 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) return; } - local->rx_pending = skb_get(skb); - del_timer(&local->link_timer); - schedule_work(&local->rx_work); + __nfc_llcp_recv(local, skb); } int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb) @@ -1319,9 +1368,7 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb) if (local == NULL) return -ENODEV; - local->rx_pending = skb_get(skb); - del_timer(&local->link_timer); - schedule_work(&local->rx_work); + __nfc_llcp_recv(local, skb); return 0; } @@ -1335,7 +1382,7 @@ void nfc_llcp_mac_is_down(struct nfc_dev *dev) return; /* Close and purge all existing sockets */ - nfc_llcp_socket_release(local, true); + nfc_llcp_socket_release(local, true, 0); } void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, @@ -1414,6 +1461,8 @@ void nfc_llcp_unregister_device(struct nfc_dev *dev) return; } + local_cleanup(local, false); + nfc_llcp_local_put(local); } diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h index 0d62366..0eae5c5 100644 --- a/net/nfc/llcp/llcp.h +++ b/net/nfc/llcp/llcp.h @@ -121,7 +121,6 @@ struct nfc_llcp_sock { struct sk_buff_head tx_queue; struct sk_buff_head tx_pending_queue; - struct sk_buff_head tx_backlog_queue; struct list_head accept_queue; struct sock *parent; diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index fea22eb..6c94447 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -270,7 +270,9 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent, } if (sk->sk_state == LLCP_CONNECTED || !newsock) { - nfc_llcp_accept_unlink(sk); + list_del_init(&lsk->accept_queue); + sock_put(sk); + if (newsock) sock_graft(sk, newsock); @@ -278,6 +280,8 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent, pr_debug("Returning sk state %d\n", sk->sk_state); + sk_acceptq_removed(parent); + return sk; } @@ -462,8 +466,6 @@ static int llcp_sock_release(struct socket *sock) nfc_llcp_accept_unlink(accept_sk); release_sock(accept_sk); - - sock_orphan(accept_sk); } } @@ -644,6 +646,8 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, pr_debug("%p %zu\n", sk, len); + msg->msg_namelen = 0; + lock_sock(sk); if (sk->sk_state == LLCP_CLOSED && @@ -672,25 +676,28 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, copied = min_t(unsigned int, rlen, len); cskb = skb; - if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) { + if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); return -EFAULT; } + sock_recv_timestamp(msg, sk, skb); + if (sk->sk_type == SOCK_DGRAM && msg->msg_name) { struct nfc_llcp_ui_cb *ui_cb = nfc_llcp_ui_skb_cb(skb); - struct sockaddr_nfc_llcp sockaddr; + struct sockaddr_nfc_llcp *sockaddr = + (struct sockaddr_nfc_llcp *) msg->msg_name; - pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap); + msg->msg_namelen = sizeof(struct sockaddr_nfc_llcp); - sockaddr.sa_family = AF_NFC; - sockaddr.nfc_protocol = NFC_PROTO_NFC_DEP; - sockaddr.dsap = ui_cb->dsap; - sockaddr.ssap = ui_cb->ssap; + pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap); - memcpy(msg->msg_name, &sockaddr, sizeof(sockaddr)); - msg->msg_namelen = sizeof(sockaddr); + memset(sockaddr, 0, sizeof(*sockaddr)); + sockaddr->sa_family = AF_NFC; + sockaddr->nfc_protocol = NFC_PROTO_NFC_DEP; + sockaddr->dsap = ui_cb->dsap; + sockaddr->ssap = ui_cb->ssap; } /* Mark read part of skb as used */ @@ -806,7 +813,6 @@ struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp) llcp_sock->reserved_ssap = LLCP_SAP_MAX; skb_queue_head_init(&llcp_sock->tx_queue); skb_queue_head_init(&llcp_sock->tx_pending_queue); - skb_queue_head_init(&llcp_sock->tx_backlog_queue); INIT_LIST_HEAD(&llcp_sock->accept_queue); if (sock != NULL) @@ -821,7 +827,6 @@ void nfc_llcp_sock_free(struct nfc_llcp_sock *sock) skb_queue_purge(&sock->tx_queue); skb_queue_purge(&sock->tx_pending_queue); - skb_queue_purge(&sock->tx_backlog_queue); list_del_init(&sock->accept_queue); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 5f98dc1..48ada0e 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -658,6 +658,7 @@ static struct nfc_ops nci_nfc_ops = { */ struct nci_dev *nci_allocate_device(struct nci_ops *ops, __u32 supported_protocols, + __u32 supported_se, int tx_headroom, int tx_tailroom) { struct nci_dev *ndev; @@ -680,6 +681,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops, supported_protocols, + supported_se, tx_headroom + NCI_DATA_HDR_SIZE, tx_tailroom); if (!ndev->nfc_dev) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 3568ae1..504b883 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -366,6 +366,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || + nla_put_u32(msg, NFC_ATTR_SE, dev->supported_se) || nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) || nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) goto nla_put_failure; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index ac2defe..d4d5363 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -58,7 +58,7 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(skb->data - + ETH_HLEN, VLAN_HLEN, 0)); + + (2 * ETH_ALEN), VLAN_HLEN, 0)); vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); *current_tci = vhdr->h_vlan_TCI; @@ -115,7 +115,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data - + ETH_HLEN, VLAN_HLEN, 0)); + + (2 * ETH_ALEN), VLAN_HLEN, 0)); } __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f996db3..6980c3e 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -158,11 +158,10 @@ static struct hlist_head *vport_hash_bucket(const struct datapath *dp, struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) { struct vport *vport; - struct hlist_node *n; struct hlist_head *head; head = vport_hash_bucket(dp, port_no); - hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) { + hlist_for_each_entry_rcu(vport, head, dp_hash_node) { if (vport->port_no == port_no) return vport; } @@ -301,7 +300,7 @@ static int queue_gso_packets(struct net *net, int dp_ifindex, struct sk_buff *segs, *nskb; int err; - segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); + segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false); if (IS_ERR(segs)) return PTR_ERR(segs); @@ -395,6 +394,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, skb_copy_and_csum_dev(skb, nla_data(nla)); + genlmsg_end(user_skb, upcall); err = genlmsg_unicast(net, user_skb, upcall_info->portid); out: @@ -1386,9 +1386,9 @@ static void __dp_destroy(struct datapath *dp) for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; - struct hlist_node *node, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node) + hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) if (vport->port_no != OVSP_LOCAL) ovs_dp_detach_port(vport); } @@ -1593,10 +1593,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, return ERR_PTR(-ENOMEM); retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); - if (retval < 0) { - kfree_skb(skb); - return ERR_PTR(retval); - } + BUG_ON(retval < 0); + return skb; } @@ -1691,6 +1689,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; + err = 0; reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { @@ -1725,24 +1724,32 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) err = -EINVAL; + reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!reply) { + err = -ENOMEM; + goto exit_unlock; + } + if (!err && a[OVS_VPORT_ATTR_OPTIONS]) err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); if (err) - goto exit_unlock; + goto exit_free; + if (a[OVS_VPORT_ATTR_UPCALL_PID]) vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_NEW); - if (IS_ERR(reply)) { - netlink_set_err(sock_net(skb->sk)->genl_sock, 0, - ovs_dp_vport_multicast_group.id, PTR_ERR(reply)); - goto exit_unlock; - } + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); + rtnl_unlock(); + return 0; + +exit_free: + kfree_skb(reply); exit_unlock: rtnl_unlock(); return err; @@ -1772,6 +1779,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(reply)) goto exit_unlock; + err = 0; ovs_dp_detach_port(vport); genl_notify(reply, genl_info_net(info), info->snd_portid, @@ -1825,10 +1833,9 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; - struct hlist_node *n; j = 0; - hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) { + hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { if (j >= skip && ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).portid, @@ -1989,10 +1996,9 @@ static struct pernet_operations ovs_net_ops = { static int __init dp_init(void) { - struct sk_buff *dummy_skb; int err; - BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb)); + BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); pr_info("Open vSwitch switching datapath\n"); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index c3294ce..67a2b78 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -299,10 +299,10 @@ void ovs_flow_tbl_destroy(struct flow_table *table) for (i = 0; i < table->n_buckets; i++) { struct sw_flow *flow; struct hlist_head *head = flex_array_get(table->buckets, i); - struct hlist_node *node, *n; + struct hlist_node *n; int ver = table->node_ver; - hlist_for_each_entry_safe(flow, node, n, head, hash_node[ver]) { + hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { hlist_del_rcu(&flow->hash_node[ver]); ovs_flow_free(flow); } @@ -332,7 +332,6 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la { struct sw_flow *flow; struct hlist_head *head; - struct hlist_node *n; int ver; int i; @@ -340,7 +339,7 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la while (*bucket < table->n_buckets) { i = 0; head = flex_array_get(table->buckets, *bucket); - hlist_for_each_entry_rcu(flow, n, head, hash_node[ver]) { + hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { if (i < *last) { i++; continue; @@ -367,11 +366,10 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new for (i = 0; i < old->n_buckets; i++) { struct sw_flow *flow; struct hlist_head *head; - struct hlist_node *n; head = flex_array_get(old->buckets, i); - hlist_for_each_entry(flow, n, head, hash_node[old_ver]) + hlist_for_each_entry(flow, head, hash_node[old_ver]) ovs_flow_tbl_insert(new, flow); } old->keep_flows = true; @@ -484,7 +482,11 @@ static __be16 parse_ethertype(struct sk_buff *skb) return htons(ETH_P_802_2); __skb_pull(skb, sizeof(struct llc_snap_hdr)); - return llc->ethertype; + + if (ntohs(llc->ethertype) >= 1536) + return llc->ethertype; + + return htons(ETH_P_802_2); } static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, @@ -766,14 +768,13 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, struct sw_flow_key *key, int key_len) { struct sw_flow *flow; - struct hlist_node *n; struct hlist_head *head; u32 hash; hash = ovs_flow_hash(key, key_len); head = find_bucket(table, hash); - hlist_for_each_entry_rcu(flow, n, head, hash_node[table->node_ver]) { + hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { if (flow->hash == hash && !memcmp(&flow->key, key, key_len)) { @@ -794,9 +795,9 @@ void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { + BUG_ON(table->count == 0); hlist_del_rcu(&flow->hash_node[table->node_ver]); table->count--; - BUG_ON(table->count < 0); } /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 5d460c3..0531de6 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -69,7 +69,6 @@ static int internal_dev_mac_addr(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - dev->addr_assign_type &= ~NET_ADDR_RANDOM; memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); return 0; } @@ -98,7 +97,7 @@ static int internal_dev_stop(struct net_device *netdev) static void internal_dev_getinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { - strcpy(info->driver, "openvswitch"); + strlcpy(info->driver, "openvswitch", sizeof(info->driver)); } static const struct ethtool_ops internal_dev_ethtool_ops = { diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index a9327e2..2130d61 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -35,21 +35,25 @@ /* Must be called with rcu_read_lock. */ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) { - if (unlikely(!vport)) { - kfree_skb(skb); - return; - } + if (unlikely(!vport)) + goto error; + + if (unlikely(skb_warn_if_lro(skb))) + goto error; /* Make our own copy of the packet. Otherwise we will mangle the * packet for anyone who came before us (e.g. tcpdump via AF_PACKET). - * (No one comes after us, since we tell handle_bridge() that we took - * the packet.) */ + */ skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) return; skb_push(skb, ETH_HLEN); ovs_vport_receive(vport, skb); + return; + +error: + kfree_skb(skb); } /* Called with rcu_read_lock and bottom-halves disabled. */ @@ -169,9 +173,6 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) goto error; } - if (unlikely(skb_warn_if_lro(skb))) - goto error; - skb->dev = netdev_vport->dev; len = skb->len; dev_queue_xmit(skb); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 70af0be..f6b8132 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -86,9 +86,8 @@ struct vport *ovs_vport_locate(struct net *net, const char *name) { struct hlist_head *bucket = hash_bucket(net, name); struct vport *vport; - struct hlist_node *node; - hlist_for_each_entry_rcu(vport, node, bucket, hash_node) + hlist_for_each_entry_rcu(vport, bucket, hash_node) if (!strcmp(name, vport->ops->get_name(vport)) && net_eq(ovs_dp_get_net(vport->dp), net)) return vport; @@ -326,8 +325,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) * @skb: skb that was received * * Must be called with rcu_read_lock. The packet cannot be shared and - * skb->data should point to the Ethernet header. The caller must have already - * called compute_ip_summed() to initialize the checksumming fields. + * skb->data should point to the Ethernet header. */ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e639645..1d6793d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2361,13 +2361,15 @@ static int packet_release(struct socket *sock) packet_flush_mclist(sk); - memset(&req_u, 0, sizeof(req_u)); - - if (po->rx_ring.pg_vec) + if (po->rx_ring.pg_vec) { + memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 0); + } - if (po->tx_ring.pg_vec) + if (po->tx_ring.pg_vec) { + memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 1); + } fanout_release(sk); @@ -3261,12 +3263,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data) { struct sock *sk; - struct hlist_node *node; struct net_device *dev = data; struct net *net = dev_net(dev); rcu_read_lock(); - sk_for_each_rcu(sk, node, &net->packet.sklist) { + sk_for_each_rcu(sk, &net->packet.sklist) { struct packet_sock *po = pkt_sk(sk); switch (msg) { @@ -3826,7 +3827,7 @@ static int __net_init packet_net_init(struct net *net) mutex_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); - if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) + if (!proc_create("packet", 0, net->proc_net, &packet_seq_fops)) return -ENOMEM; return 0; @@ -3834,7 +3835,7 @@ static int __net_init packet_net_init(struct net *net) static void __net_exit packet_net_exit(struct net *net) { - proc_net_remove(net, "packet"); + remove_proc_entry("packet", net->proc_net); } static struct pernet_operations packet_net_ops = { diff --git a/net/packet/diag.c b/net/packet/diag.c index 8db6e21..d3fcd1e 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -172,13 +172,12 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct packet_diag_req *req; struct net *net; struct sock *sk; - struct hlist_node *node; net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); mutex_lock(&net->packet.sklist_lock); - sk_for_each(sk, node, &net->packet.sklist) { + sk_for_each(sk, &net->packet.sklist) { if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 576f22c..e774117 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -640,11 +640,10 @@ static struct sock *pep_find_pipe(const struct hlist_head *hlist, const struct sockaddr_pn *dst, u8 pipe_handle) { - struct hlist_node *node; struct sock *sknode; u16 dobj = pn_sockaddr_get_object(dst); - sk_for_each(sknode, node, hlist) { + sk_for_each(sknode, hlist) { struct pep_sock *pnnode = pep_sk(sknode); /* Ports match, but addresses might not: */ diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 5bf6341..45a7df6 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -320,7 +320,7 @@ static int __net_init phonet_init_net(struct net *net) { struct phonet_net *pnn = phonet_pernet(net); - if (!proc_net_fops_create(net, "phonet", 0, &pn_sock_seq_fops)) + if (!proc_create("phonet", 0, net->proc_net, &pn_sock_seq_fops)) return -ENOMEM; INIT_LIST_HEAD(&pnn->pndevs.list); @@ -331,7 +331,7 @@ static int __net_init phonet_init_net(struct net *net) static void __net_exit phonet_exit_net(struct net *net) { - proc_net_remove(net, "phonet"); + remove_proc_entry("phonet", net->proc_net); } static struct pernet_operations phonet_net_ops = { @@ -348,7 +348,7 @@ int __init phonet_device_init(void) if (err) return err; - proc_net_fops_create(&init_net, "pnresource", 0, &pn_res_seq_fops); + proc_create("pnresource", 0, init_net.proc_net, &pn_res_seq_fops); register_netdevice_notifier(&phonet_device_notifier); err = phonet_netlink_register(); if (err) @@ -361,7 +361,7 @@ void phonet_device_exit(void) rtnl_unregister_all(PF_PHONET); unregister_netdevice_notifier(&phonet_device_notifier); unregister_pernet_subsys(&phonet_net_ops); - proc_net_remove(&init_net, "pnresource"); + remove_proc_entry("pnresource", init_net.proc_net); } int phonet_route_add(struct net_device *dev, u8 daddr) diff --git a/net/phonet/socket.c b/net/phonet/socket.c index b7e9827..1afd138 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -76,7 +76,6 @@ static struct hlist_head *pn_hash_list(u16 obj) */ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) { - struct hlist_node *node; struct sock *sknode; struct sock *rval = NULL; u16 obj = pn_sockaddr_get_object(spn); @@ -84,7 +83,7 @@ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) struct hlist_head *hlist = pn_hash_list(obj); rcu_read_lock(); - sk_for_each_rcu(sknode, node, hlist) { + sk_for_each_rcu(sknode, hlist) { struct pn_sock *pn = pn_sk(sknode); BUG_ON(!pn->sobject); /* unbound socket */ @@ -120,10 +119,9 @@ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb) rcu_read_lock(); for (h = 0; h < PN_HASHSIZE; h++) { - struct hlist_node *node; struct sock *sknode; - sk_for_each(sknode, node, hlist) { + sk_for_each(sknode, hlist) { struct sk_buff *clone; if (!net_eq(sock_net(sknode), net)) @@ -543,12 +541,11 @@ static struct sock *pn_sock_get_idx(struct seq_file *seq, loff_t pos) { struct net *net = seq_file_net(seq); struct hlist_head *hlist = pnsocks.hlist; - struct hlist_node *node; struct sock *sknode; unsigned int h; for (h = 0; h < PN_HASHSIZE; h++) { - sk_for_each_rcu(sknode, node, hlist) { + sk_for_each_rcu(sknode, hlist) { if (!net_eq(net, sock_net(sknode))) continue; if (!pos) diff --git a/net/rds/Kconfig b/net/rds/Kconfig index ec753b3..f2c670b 100644 --- a/net/rds/Kconfig +++ b/net/rds/Kconfig @@ -1,7 +1,7 @@ config RDS - tristate "The RDS Protocol (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL + tristate "The RDS Protocol" + depends on INET ---help--- The RDS (Reliable Datagram Sockets) protocol provides reliable, sequenced delivery of datagrams over Infiniband, iWARP, diff --git a/net/rds/bind.c b/net/rds/bind.c index 637bde5..b5ad65a 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -52,13 +52,12 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port, struct rds_sock *insert) { struct rds_sock *rs; - struct hlist_node *node; struct hlist_head *head = hash_to_bucket(addr, port); u64 cmp; u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port); rcu_read_lock(); - hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) { + hlist_for_each_entry_rcu(rs, head, rs_bound_node) { cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) | be16_to_cpu(rs->rs_bound_port); diff --git a/net/rds/connection.c b/net/rds/connection.c index 9e07c75..642ad42 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -69,9 +69,8 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head, struct rds_transport *trans) { struct rds_connection *conn, *ret = NULL; - struct hlist_node *pos; - hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { + hlist_for_each_entry_rcu(conn, head, c_hash_node) { if (conn->c_faddr == faddr && conn->c_laddr == laddr && conn->c_trans == trans) { ret = conn; @@ -376,7 +375,6 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, int want_send) { struct hlist_head *head; - struct hlist_node *pos; struct list_head *list; struct rds_connection *conn; struct rds_message *rm; @@ -390,7 +388,7 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); i++, head++) { - hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { + hlist_for_each_entry_rcu(conn, head, c_hash_node) { if (want_send) list = &conn->c_send_queue; else @@ -439,7 +437,6 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, { uint64_t buffer[(item_len + 7) / 8]; struct hlist_head *head; - struct hlist_node *pos; struct rds_connection *conn; size_t i; @@ -450,7 +447,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); i++, head++) { - hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { + hlist_for_each_entry_rcu(conn, head, c_hash_node) { /* XXX no c_lock usage.. */ if (!visitor(conn, buffer)) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index a1e1162..31b74f5 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -434,12 +434,11 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event) version = RDS_PROTOCOL_3_0; while ((common >>= 1) != 0) version++; - } - printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using " - "incompatible protocol version %u.%u\n", - &dp->dp_saddr, - dp->dp_protocol_major, - dp->dp_protocol_minor); + } else + printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n", + &dp->dp_saddr, + dp->dp_protocol_major, + dp->dp_protocol_minor); return version; } diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 8c5bc85..8eb9501 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -339,8 +339,8 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn, sge->length = sizeof(struct rds_header); sge = &recv->r_sge[1]; - sge->addr = sg_dma_address(&recv->r_frag->f_sg); - sge->length = sg_dma_len(&recv->r_frag->f_sg); + sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg); + sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg); ret = 0; out: @@ -381,7 +381,10 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill) ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, recv->r_ibinc, sg_page(&recv->r_frag->f_sg), - (long) sg_dma_address(&recv->r_frag->f_sg), ret); + (long) ib_sg_dma_address( + ic->i_cm_id->device, + &recv->r_frag->f_sg), + ret); if (ret) { rds_ib_conn_error(conn, "recv post on " "%pI4 returned %d, disconnecting and " diff --git a/net/rds/message.c b/net/rds/message.c index f0a4658..aba232f 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -82,10 +82,7 @@ static void rds_message_purge(struct rds_message *rm) void rds_message_put(struct rds_message *rm) { rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); - if (atomic_read(&rm->m_refcount) == 0) { -printk(KERN_CRIT "danger refcount zero on %p\n", rm); -WARN_ON(1); - } + WARN(!atomic_read(&rm->m_refcount), "danger refcount zero on %p\n", rm); if (atomic_dec_and_test(&rm->m_refcount)) { BUG_ON(!list_empty(&rm->m_sock_item)); BUG_ON(!list_empty(&rm->m_conn_item)); @@ -197,6 +194,9 @@ struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp) { struct rds_message *rm; + if (extra_len > KMALLOC_MAX_SIZE - sizeof(struct rds_message)) + return NULL; + rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp); if (!rm) goto out; diff --git a/net/rds/stats.c b/net/rds/stats.c index 7be790d..73be187 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c @@ -87,6 +87,7 @@ void rds_stats_info_copy(struct rds_info_iterator *iter, for (i = 0; i < nr; i++) { BUG_ON(strlen(names[i]) >= sizeof(ctr.name)); strncpy(ctr.name, names[i], sizeof(ctr.name) - 1); + ctr.name[sizeof(ctr.name) - 1] = '\0'; ctr.value = values[i]; rds_info_copy(iter, &ctr, sizeof(ctr)); diff --git a/net/rfkill/input.c b/net/rfkill/input.c index c9d931e..b85107b 100644 --- a/net/rfkill/input.c +++ b/net/rfkill/input.c @@ -148,11 +148,9 @@ static unsigned long rfkill_ratelimit(const unsigned long last) static void rfkill_schedule_ratelimited(void) { - if (delayed_work_pending(&rfkill_op_work)) - return; - schedule_delayed_work(&rfkill_op_work, - rfkill_ratelimit(rfkill_last_scheduled)); - rfkill_last_scheduled = jiffies; + if (schedule_delayed_work(&rfkill_op_work, + rfkill_ratelimit(rfkill_last_scheduled))) + rfkill_last_scheduled = jiffies; } static void rfkill_schedule_global_op(enum rfkill_sched_op op) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index c4719ce..9c83474 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -165,10 +165,9 @@ static void rose_remove_socket(struct sock *sk) void rose_kill_by_neigh(struct rose_neigh *neigh) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&rose_list_lock); - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (rose->neighbour == neigh) { @@ -186,10 +185,9 @@ void rose_kill_by_neigh(struct rose_neigh *neigh) static void rose_kill_by_device(struct net_device *dev) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&rose_list_lock); - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (rose->device == dev) { @@ -246,10 +244,9 @@ static void rose_insert_socket(struct sock *sk) static struct sock *rose_find_listener(rose_address *addr, ax25_address *call) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&rose_list_lock); - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (!rosecmp(&rose->source_addr, addr) && @@ -258,7 +255,7 @@ static struct sock *rose_find_listener(rose_address *addr, ax25_address *call) goto found; } - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (!rosecmp(&rose->source_addr, addr) && @@ -278,10 +275,9 @@ found: struct sock *rose_find_socket(unsigned int lci, struct rose_neigh *neigh) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&rose_list_lock); - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (rose->lci == lci && rose->neighbour == neigh) @@ -1257,6 +1253,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); if (srose != NULL) { + memset(srose, 0, msg->msg_namelen); srose->srose_family = AF_ROSE; srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; @@ -1575,10 +1572,13 @@ static int __init rose_proto_init(void) rose_add_loopback_neigh(); - proc_net_fops_create(&init_net, "rose", S_IRUGO, &rose_info_fops); - proc_net_fops_create(&init_net, "rose_neigh", S_IRUGO, &rose_neigh_fops); - proc_net_fops_create(&init_net, "rose_nodes", S_IRUGO, &rose_nodes_fops); - proc_net_fops_create(&init_net, "rose_routes", S_IRUGO, &rose_routes_fops); + proc_create("rose", S_IRUGO, init_net.proc_net, &rose_info_fops); + proc_create("rose_neigh", S_IRUGO, init_net.proc_net, + &rose_neigh_fops); + proc_create("rose_nodes", S_IRUGO, init_net.proc_net, + &rose_nodes_fops); + proc_create("rose_routes", S_IRUGO, init_net.proc_net, + &rose_routes_fops); out: return rc; fail: @@ -1605,10 +1605,10 @@ static void __exit rose_exit(void) { int i; - proc_net_remove(&init_net, "rose"); - proc_net_remove(&init_net, "rose_neigh"); - proc_net_remove(&init_net, "rose_nodes"); - proc_net_remove(&init_net, "rose_routes"); + remove_proc_entry("rose", init_net.proc_net); + remove_proc_entry("rose_neigh", init_net.proc_net); + remove_proc_entry("rose_nodes", init_net.proc_net); + remove_proc_entry("rose_routes", init_net.proc_net); rose_loopback_clear(); rose_rt_free(); diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 0d3103c..23dcef1 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -4,7 +4,7 @@ config AF_RXRPC tristate "RxRPC session sockets" - depends on INET && EXPERIMENTAL + depends on INET select CRYPTO select KEYS help diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 05996d0..e61aa60 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -10,6 +10,7 @@ */ #include <linux/module.h> +#include <linux/kernel.h> #include <linux/net.h> #include <linux/slab.h> #include <linux/skbuff.h> @@ -792,10 +793,9 @@ static const struct net_proto_family rxrpc_family_ops = { */ static int __init af_rxrpc_init(void) { - struct sk_buff *dummy_skb; int ret = -1; - BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof(dummy_skb->cb)); + BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb)); rxrpc_epoch = htonl(get_seconds()); @@ -839,8 +839,9 @@ static int __init af_rxrpc_init(void) } #ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "rxrpc_calls", 0, &rxrpc_call_seq_fops); - proc_net_fops_create(&init_net, "rxrpc_conns", 0, &rxrpc_connection_seq_fops); + proc_create("rxrpc_calls", 0, init_net.proc_net, &rxrpc_call_seq_fops); + proc_create("rxrpc_conns", 0, init_net.proc_net, + &rxrpc_connection_seq_fops); #endif return 0; @@ -878,8 +879,8 @@ static void __exit af_rxrpc_exit(void) _debug("flush scheduled work"); flush_workqueue(rxrpc_workqueue); - proc_net_remove(&init_net, "rxrpc_conns"); - proc_net_remove(&init_net, "rxrpc_calls"); + remove_proc_entry("rxrpc_conns", init_net.proc_net); + remove_proc_entry("rxrpc_calls", init_net.proc_net); destroy_workqueue(rxrpc_workqueue); kmem_cache_destroy(rxrpc_call_jar); _leave(""); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 65d240c..8579c4b 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -485,8 +485,9 @@ errout: return err; } -struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind) +struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, + struct nlattr *est, char *name, int ovr, + int bind) { struct tc_action *a; struct tc_action_ops *a_o; @@ -542,9 +543,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est, /* backward compatibility for policer */ if (name == NULL) - err = a_o->init(tb[TCA_ACT_OPTIONS], est, a, ovr, bind); + err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, a, ovr, bind); else - err = a_o->init(nla, est, a, ovr, bind); + err = a_o->init(net, nla, est, a, ovr, bind); if (err < 0) goto err_free; @@ -566,8 +567,9 @@ err_out: return ERR_PTR(err); } -struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind) +struct tc_action *tcf_action_init(struct net *net, struct nlattr *nla, + struct nlattr *est, char *name, int ovr, + int bind) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *head = NULL, *act, *act_prev = NULL; @@ -579,7 +581,7 @@ struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est, return ERR_PTR(err); for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_init_1(tb[i], est, name, ovr, bind); + act = tcf_action_init_1(net, tb[i], est, name, ovr, bind); if (IS_ERR(act)) goto err; act->order = i; @@ -960,7 +962,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, struct tc_action *a; u32 seq = n->nlmsg_seq; - act = tcf_action_init(nla, NULL, NULL, ovr, 0); + act = tcf_action_init(net, nla, NULL, NULL, ovr, 0); if (act == NULL) goto done; if (IS_ERR(act)) { diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 2c8ad7c..08fa1e8 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -51,7 +51,7 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, }; -static int tcf_csum_init(struct nlattr *nla, struct nlattr *est, +static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est, struct tc_action *a, int ovr, int bind) { struct nlattr *tb[TCA_CSUM_MAX + 1]; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 05d60859..fd2b3cf 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -58,8 +58,9 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { [TCA_GACT_PROB] = { .len = sizeof(struct tc_gact_p) }, }; -static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, - struct tc_action *a, int ovr, int bind) +static int tcf_gact_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) { struct nlattr *tb[TCA_GACT_MAX + 1]; struct tc_gact *parm; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 58fb3c7..e0f6de6 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -102,7 +102,7 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) }, }; -static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, +static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *a, int ovr, int bind) { struct nlattr *tb[TCA_IPT_MAX + 1]; @@ -207,10 +207,8 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, struct tcf_ipt *ipt = a->priv; struct xt_action_param par; - if (skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return TC_ACT_UNSPEC; - } + if (skb_unclone(skb, GFP_ATOMIC)) + return TC_ACT_UNSPEC; spin_lock(&ipt->tcf_lock); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 9c0fd0c..5d676ed 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -62,8 +62,9 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, }; -static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, - struct tc_action *a, int ovr, int bind) +static int tcf_mirred_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, int ovr, + int bind) { struct nlattr *tb[TCA_MIRRED_MAX + 1]; struct tc_mirred *parm; @@ -88,7 +89,7 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, return -EINVAL; } if (parm->ifindex) { - dev = __dev_get_by_index(&init_net, parm->ifindex); + dev = __dev_get_by_index(net, parm->ifindex); if (dev == NULL) return -ENODEV; switch (dev->type) { diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index b5d029e..876f0ef 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -44,7 +44,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) }, }; -static int tcf_nat_init(struct nlattr *nla, struct nlattr *est, +static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *a, int ovr, int bind) { struct nlattr *tb[TCA_NAT_MAX + 1]; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 45c53ab..7ed78c9 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -38,8 +38,9 @@ static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, }; -static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est, - struct tc_action *a, int ovr, int bind) +static int tcf_pedit_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) { struct nlattr *tb[TCA_PEDIT_MAX + 1]; struct tc_pedit *parm; @@ -130,8 +131,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, int i, munged = 0; unsigned int off; - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return p->tcf_action; off = skb_network_offset(skb); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index a9de232..823463a 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -22,8 +22,23 @@ #include <net/act_api.h> #include <net/netlink.h> -#define L2T(p, L) qdisc_l2t((p)->tcfp_R_tab, L) -#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L) +struct tcf_police { + struct tcf_common common; + int tcfp_result; + u32 tcfp_ewma_rate; + s64 tcfp_burst; + u32 tcfp_mtu; + s64 tcfp_toks; + s64 tcfp_ptoks; + s64 tcfp_mtu_ptoks; + s64 tcfp_t_c; + struct psched_ratecfg rate; + bool rate_present; + struct psched_ratecfg peak; + bool peak_present; +}; +#define to_police(pc) \ + container_of(pc, struct tcf_police, common) #define POL_TAB_MASK 15 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; @@ -108,10 +123,6 @@ static void tcf_police_destroy(struct tcf_police *p) write_unlock_bh(&police_lock); gen_kill_estimator(&p->tcf_bstats, &p->tcf_rate_est); - if (p->tcfp_R_tab) - qdisc_put_rtab(p->tcfp_R_tab); - if (p->tcfp_P_tab) - qdisc_put_rtab(p->tcfp_P_tab); /* * gen_estimator est_timer() might access p->tcf_lock * or bstats, wait a RCU grace period before freeing p @@ -130,8 +141,9 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { [TCA_POLICE_RESULT] = { .type = NLA_U32 }, }; -static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est, - struct tc_action *a, int ovr, int bind) +static int tcf_act_police_locate(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) { unsigned int h; int ret = 0, err; @@ -211,26 +223,36 @@ override: } /* No failure allowed after this point */ - if (R_tab != NULL) { - qdisc_put_rtab(police->tcfp_R_tab); - police->tcfp_R_tab = R_tab; + police->tcfp_mtu = parm->mtu; + if (police->tcfp_mtu == 0) { + police->tcfp_mtu = ~0; + if (R_tab) + police->tcfp_mtu = 255 << R_tab->rate.cell_log; + } + if (R_tab) { + police->rate_present = true; + psched_ratecfg_precompute(&police->rate, R_tab->rate.rate); + qdisc_put_rtab(R_tab); + } else { + police->rate_present = false; } - if (P_tab != NULL) { - qdisc_put_rtab(police->tcfp_P_tab); - police->tcfp_P_tab = P_tab; + if (P_tab) { + police->peak_present = true; + psched_ratecfg_precompute(&police->peak, P_tab->rate.rate); + qdisc_put_rtab(P_tab); + } else { + police->peak_present = false; } if (tb[TCA_POLICE_RESULT]) police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]); - police->tcfp_toks = police->tcfp_burst = parm->burst; - police->tcfp_mtu = parm->mtu; - if (police->tcfp_mtu == 0) { - police->tcfp_mtu = ~0; - if (police->tcfp_R_tab) - police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log; + police->tcfp_burst = PSCHED_TICKS2NS(parm->burst); + police->tcfp_toks = police->tcfp_burst; + if (police->peak_present) { + police->tcfp_mtu_ptoks = (s64) psched_l2t_ns(&police->peak, + police->tcfp_mtu); + police->tcfp_ptoks = police->tcfp_mtu_ptoks; } - if (police->tcfp_P_tab) - police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); police->tcf_action = parm->action; if (tb[TCA_POLICE_AVRATE]) @@ -240,7 +262,7 @@ override: if (ret != ACT_P_CREATED) return ret; - police->tcfp_t_c = psched_get_time(); + police->tcfp_t_c = ktime_to_ns(ktime_get()); police->tcf_index = parm->index ? parm->index : tcf_hash_new_index(&police_idx_gen, &police_hash_info); h = tcf_hash(police->tcf_index, POL_TAB_MASK); @@ -286,9 +308,9 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_police *police = a->priv; - psched_time_t now; - long toks; - long ptoks = 0; + s64 now; + s64 toks; + s64 ptoks = 0; spin_lock(&police->tcf_lock); @@ -304,24 +326,25 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, } if (qdisc_pkt_len(skb) <= police->tcfp_mtu) { - if (police->tcfp_R_tab == NULL) { + if (!police->rate_present) { spin_unlock(&police->tcf_lock); return police->tcfp_result; } - now = psched_get_time(); - toks = psched_tdiff_bounded(now, police->tcfp_t_c, - police->tcfp_burst); - if (police->tcfp_P_tab) { + now = ktime_to_ns(ktime_get()); + toks = min_t(s64, now - police->tcfp_t_c, + police->tcfp_burst); + if (police->peak_present) { ptoks = toks + police->tcfp_ptoks; - if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) - ptoks = (long)L2T_P(police, police->tcfp_mtu); - ptoks -= L2T_P(police, qdisc_pkt_len(skb)); + if (ptoks > police->tcfp_mtu_ptoks) + ptoks = police->tcfp_mtu_ptoks; + ptoks -= (s64) psched_l2t_ns(&police->peak, + qdisc_pkt_len(skb)); } toks += police->tcfp_toks; - if (toks > (long)police->tcfp_burst) + if (toks > police->tcfp_burst) toks = police->tcfp_burst; - toks -= L2T(police, qdisc_pkt_len(skb)); + toks -= (s64) psched_l2t_ns(&police->rate, qdisc_pkt_len(skb)); if ((toks|ptoks) >= 0) { police->tcfp_t_c = now; police->tcfp_toks = toks; @@ -347,15 +370,15 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) .index = police->tcf_index, .action = police->tcf_action, .mtu = police->tcfp_mtu, - .burst = police->tcfp_burst, + .burst = PSCHED_NS2TICKS(police->tcfp_burst), .refcnt = police->tcf_refcnt - ref, .bindcnt = police->tcf_bindcnt - bind, }; - if (police->tcfp_R_tab) - opt.rate = police->tcfp_R_tab->rate; - if (police->tcfp_P_tab) - opt.peakrate = police->tcfp_P_tab->rate; + if (police->rate_present) + opt.rate.rate = psched_ratecfg_getrate(&police->rate); + if (police->peak_present) + opt.peakrate.rate = psched_ratecfg_getrate(&police->peak); if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt)) goto nla_put_failure; if (police->tcfp_result && diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 3714f60..7725eb4 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -95,8 +95,9 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { [TCA_DEF_DATA] = { .type = NLA_STRING, .len = SIMP_MAX_DATA }, }; -static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, - struct tc_action *a, int ovr, int bind) +static int tcf_simp_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) { struct nlattr *tb[TCA_DEF_MAX + 1]; struct tc_defact *parm; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 476e0fa..cb42211 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -67,8 +67,9 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { [TCA_SKBEDIT_MARK] = { .len = sizeof(u32) }, }; -static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, - struct tc_action *a, int ovr, int bind) +static int tcf_skbedit_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) { struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; struct tc_skbedit *parm; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ff55ed6..964f5e4 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -321,7 +321,7 @@ replay: } } - err = tp->ops->change(skb, tp, cl, t->tcm_handle, tca, &fh); + err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh); if (err == 0) { if (tp_created) { spin_lock_bh(root_lock); @@ -508,7 +508,7 @@ void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_destroy); -int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb, +int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, const struct tcf_ext_map *map) { @@ -519,7 +519,7 @@ int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb, struct tc_action *act; if (map->police && tb[map->police]) { - act = tcf_action_init_1(tb[map->police], rate_tlv, + act = tcf_action_init_1(net, tb[map->police], rate_tlv, "police", TCA_ACT_NOREPLACE, TCA_ACT_BIND); if (IS_ERR(act)) @@ -528,8 +528,9 @@ int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb, act->type = TCA_OLD_COMPAT; exts->action = act; } else if (map->action && tb[map->action]) { - act = tcf_action_init(tb[map->action], rate_tlv, NULL, - TCA_ACT_NOREPLACE, TCA_ACT_BIND); + act = tcf_action_init(net, tb[map->action], rate_tlv, + NULL, TCA_ACT_NOREPLACE, + TCA_ACT_BIND); if (IS_ERR(act)) return PTR_ERR(act); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 344a11b..d76a35d 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -132,15 +132,16 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED }, }; -static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f, - unsigned long base, struct nlattr **tb, +static int basic_set_parms(struct net *net, struct tcf_proto *tp, + struct basic_filter *f, unsigned long base, + struct nlattr **tb, struct nlattr *est) { int err = -EINVAL; struct tcf_exts e; struct tcf_ematch_tree t; - err = tcf_exts_validate(tp, tb, est, &e, &basic_ext_map); + err = tcf_exts_validate(net, tp, tb, est, &e, &basic_ext_map); if (err < 0) return err; @@ -162,7 +163,7 @@ errout: return err; } -static int basic_change(struct sk_buff *in_skb, +static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { @@ -182,7 +183,7 @@ static int basic_change(struct sk_buff *in_skb, if (f != NULL) { if (handle && f->handle != handle) return -EINVAL; - return basic_set_parms(tp, f, base, tb, tca[TCA_RATE]); + return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]); } err = -ENOBUFS; @@ -208,7 +209,7 @@ static int basic_change(struct sk_buff *in_skb, f->handle = head->hgenerator; } - err = basic_set_parms(tp, f, base, tb, tca[TCA_RATE]); + err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]); if (err < 0) goto errout; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 6db7855..3a294eb 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -178,7 +178,7 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; -static int cls_cgroup_change(struct sk_buff *in_skb, +static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) @@ -215,7 +215,8 @@ static int cls_cgroup_change(struct sk_buff *in_skb, if (err < 0) return err; - err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, + &cgroup_ext_map); if (err < 0) return err; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index ce82d0c..aa36a8c 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -351,7 +351,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; -static int flow_change(struct sk_buff *in_skb, +static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) @@ -397,7 +397,7 @@ static int flow_change(struct sk_buff *in_skb, return -EOPNOTSUPP; } - err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &flow_ext_map); if (err < 0) return err; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 4075a0a..9b97172 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -192,7 +192,7 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { }; static int -fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, +fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, struct nlattr **tb, struct nlattr **tca, unsigned long base) { struct fw_head *head = (struct fw_head *)tp->root; @@ -200,11 +200,10 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, u32 mask; int err; - err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &fw_ext_map); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &fw_ext_map); if (err < 0) return err; - err = -EINVAL; if (tb[TCA_FW_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); tcf_bind_filter(tp, &f->res, base); @@ -218,6 +217,7 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, } #endif /* CONFIG_NET_CLS_IND */ + err = -EINVAL; if (tb[TCA_FW_MASK]) { mask = nla_get_u32(tb[TCA_FW_MASK]); if (mask != head->mask) @@ -233,7 +233,7 @@ errout: return err; } -static int fw_change(struct sk_buff *in_skb, +static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, @@ -255,7 +255,7 @@ static int fw_change(struct sk_buff *in_skb, if (f != NULL) { if (f->id != handle && handle) return -EINVAL; - return fw_change_attrs(tp, f, tb, tca, base); + return fw_change_attrs(net, tp, f, tb, tca, base); } if (!handle) @@ -282,7 +282,7 @@ static int fw_change(struct sk_buff *in_skb, f->id = handle; - err = fw_change_attrs(tp, f, tb, tca, base); + err = fw_change_attrs(net, tp, f, tb, tca, base); if (err < 0) goto errout; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index c10d57b..37da567 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -335,9 +335,10 @@ static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = { [TCA_ROUTE4_IIF] = { .type = NLA_U32 }, }; -static int route4_set_parms(struct tcf_proto *tp, unsigned long base, - struct route4_filter *f, u32 handle, struct route4_head *head, - struct nlattr **tb, struct nlattr *est, int new) +static int route4_set_parms(struct net *net, struct tcf_proto *tp, + unsigned long base, struct route4_filter *f, + u32 handle, struct route4_head *head, + struct nlattr **tb, struct nlattr *est, int new) { int err; u32 id = 0, to = 0, nhandle = 0x8000; @@ -346,7 +347,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base, struct route4_bucket *b; struct tcf_exts e; - err = tcf_exts_validate(tp, tb, est, &e, &route_ext_map); + err = tcf_exts_validate(net, tp, tb, est, &e, &route_ext_map); if (err < 0) return err; @@ -427,7 +428,7 @@ errout: return err; } -static int route4_change(struct sk_buff *in_skb, +static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, @@ -457,7 +458,7 @@ static int route4_change(struct sk_buff *in_skb, if (f->bkt) old_handle = f->handle; - err = route4_set_parms(tp, base, f, handle, head, tb, + err = route4_set_parms(net, tp, base, f, handle, head, tb, tca[TCA_RATE], 0); if (err < 0) return err; @@ -480,7 +481,7 @@ static int route4_change(struct sk_buff *in_skb, if (f == NULL) goto errout; - err = route4_set_parms(tp, base, f, handle, head, tb, + err = route4_set_parms(net, tp, base, f, handle, head, tb, tca[TCA_RATE], 1); if (err < 0) goto errout; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 494bbb9..252d8b0 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -416,7 +416,7 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, }; -static int rsvp_change(struct sk_buff *in_skb, +static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, @@ -440,7 +440,7 @@ static int rsvp_change(struct sk_buff *in_skb, if (err < 0) return err; - err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map); if (err < 0) return err; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index a1293b4..b86535a 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -197,9 +197,10 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = { }; static int -tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle, - struct tcindex_data *p, struct tcindex_filter_result *r, - struct nlattr **tb, struct nlattr *est) +tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + u32 handle, struct tcindex_data *p, + struct tcindex_filter_result *r, struct nlattr **tb, + struct nlattr *est) { int err, balloc = 0; struct tcindex_filter_result new_filter_result, *old_r = r; @@ -208,7 +209,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_filter *f = NULL; /* make gcc behave */ struct tcf_exts e; - err = tcf_exts_validate(tp, tb, est, &e, &tcindex_ext_map); + err = tcf_exts_validate(net, tp, tb, est, &e, &tcindex_ext_map); if (err < 0) return err; @@ -332,7 +333,7 @@ errout: } static int -tcindex_change(struct sk_buff *in_skb, +tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { @@ -353,7 +354,8 @@ tcindex_change(struct sk_buff *in_skb, if (err < 0) return err; - return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE]); + return tcindex_set_parms(net, tp, base, handle, p, r, tb, + tca[TCA_RATE]); } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index c7c27bc..eb07a1e 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -488,15 +488,15 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { [TCA_U32_MARK] = { .len = sizeof(struct tc_u32_mark) }, }; -static int u32_set_parms(struct tcf_proto *tp, unsigned long base, - struct tc_u_hnode *ht, +static int u32_set_parms(struct net *net, struct tcf_proto *tp, + unsigned long base, struct tc_u_hnode *ht, struct tc_u_knode *n, struct nlattr **tb, struct nlattr *est) { int err; struct tcf_exts e; - err = tcf_exts_validate(tp, tb, est, &e, &u32_ext_map); + err = tcf_exts_validate(net, tp, tb, est, &e, &u32_ext_map); if (err < 0) return err; @@ -544,7 +544,7 @@ errout: return err; } -static int u32_change(struct sk_buff *in_skb, +static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) @@ -570,7 +570,8 @@ static int u32_change(struct sk_buff *in_skb, if (TC_U32_KEY(n->handle) == 0) return -EINVAL; - return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE]); + return u32_set_parms(net, tp, base, n->ht_up, n, tb, + tca[TCA_RATE]); } if (tb[TCA_U32_DIVISOR]) { @@ -656,7 +657,7 @@ static int u32_change(struct sk_buff *in_skb, } #endif - err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE]); + err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE]); if (err == 0) { struct tc_u_knode **ins; for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d84f7e7..c297e2a 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -493,7 +493,7 @@ void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_watchdog_init); -void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) +void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires) { if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc_root_sleeping(wd->qdisc)->state)) @@ -502,10 +502,10 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) qdisc_throttled(wd->qdisc); hrtimer_start(&wd->timer, - ns_to_ktime(PSCHED_TICKS2NS(expires)), + ns_to_ktime(expires), HRTIMER_MODE_ABS); } -EXPORT_SYMBOL(qdisc_watchdog_schedule); +EXPORT_SYMBOL(qdisc_watchdog_schedule_ns); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { @@ -545,7 +545,7 @@ static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n) void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) { struct Qdisc_class_common *cl; - struct hlist_node *n, *next; + struct hlist_node *next; struct hlist_head *nhash, *ohash; unsigned int nsize, nmask, osize; unsigned int i, h; @@ -564,7 +564,7 @@ void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) sch_tree_lock(sch); for (i = 0; i < osize; i++) { - hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) { + hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) { h = qdisc_class_hash(cl->classid, nmask); hlist_add_head(&cl->hnode, &nhash[h]); } @@ -1768,7 +1768,7 @@ static int __net_init psched_net_init(struct net *net) { struct proc_dir_entry *e; - e = proc_net_fops_create(net, "psched", 0, &psched_fops); + e = proc_create("psched", 0, net->proc_net, &psched_fops); if (e == NULL) return -ENOMEM; @@ -1777,7 +1777,7 @@ static int __net_init psched_net_init(struct net *net) static void __net_exit psched_net_exit(struct net *net) { - proc_net_remove(net, "psched"); + remove_proc_entry("psched", net->proc_net); } #else static int __net_init psched_net_init(struct net *net) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 0e19948..1bc210f 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -962,8 +962,11 @@ cbq_dequeue(struct Qdisc *sch) cbq_update(q); if ((incr -= incr2) < 0) incr = 0; + q->now += incr; + } else { + if (now > q->now) + q->now = now; } - q->now += incr; q->now_rt = now; for (;;) { @@ -1041,14 +1044,13 @@ static void cbq_adjust_levels(struct cbq_class *this) static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) { struct cbq_class *cl; - struct hlist_node *n; unsigned int h; if (q->quanta[prio] == 0) return; for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { /* BUGGGG... Beware! This expression suffer of * arithmetic overflows! */ @@ -1087,10 +1089,9 @@ static void cbq_sync_defmap(struct cbq_class *cl) continue; for (h = 0; h < q->clhash.hashsize; h++) { - struct hlist_node *n; struct cbq_class *c; - hlist_for_each_entry(c, n, &q->clhash.hash[h], + hlist_for_each_entry(c, &q->clhash.hash[h], common.hnode) { if (c->split == split && c->level < level && c->defmap & (1<<i)) { @@ -1210,7 +1211,6 @@ cbq_reset(struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl; - struct hlist_node *n; int prio; unsigned int h; @@ -1228,7 +1228,7 @@ cbq_reset(struct Qdisc *sch) q->active[prio] = NULL; for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { qdisc_reset(cl->q); cl->next_alive = NULL; @@ -1697,7 +1697,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) static void cbq_destroy(struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); - struct hlist_node *n, *next; + struct hlist_node *next; struct cbq_class *cl; unsigned int h; @@ -1710,11 +1710,11 @@ static void cbq_destroy(struct Qdisc *sch) * be bound to classes which have been destroyed already. --TGR '04 */ for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) tcf_destroy_chain(&cl->filter_list); } for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], common.hnode) cbq_destroy_class(sch, cl); } @@ -2013,14 +2013,13 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl; - struct hlist_node *n; unsigned int h; if (arg->stop) return; for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 71e50c8..759b308 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -293,14 +293,13 @@ static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; - struct hlist_node *n; unsigned int i; if (arg->stop) return; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; @@ -451,11 +450,10 @@ static void drr_reset_qdisc(struct Qdisc *sch) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; - struct hlist_node *n; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->qdisc->q.qlen) list_del(&cl->alist); qdisc_reset(cl->qdisc); @@ -468,13 +466,13 @@ static void drr_destroy_qdisc(struct Qdisc *sch) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; - struct hlist_node *n, *next; + struct hlist_node *next; unsigned int i; tcf_destroy_chain(&q->filter_list); for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], common.hnode) drr_destroy_class(sch, cl); } diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 4e606fc..5578628 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -195,7 +195,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) flow->deficit = q->quantum; flow->dropped = 0; } - if (++sch->q.qlen < sch->limit) + if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; q->drop_overlimit++; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5d81a44..eac7e0e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -25,6 +25,7 @@ #include <linux/rcupdate.h> #include <linux/list.h> #include <linux/slab.h> +#include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/dst.h> @@ -896,3 +897,39 @@ void dev_shutdown(struct net_device *dev) WARN_ON(timer_pending(&dev->watchdog_timer)); } + +void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate) +{ + u64 factor; + u64 mult; + int shift; + + r->rate_bps = (u64)rate << 3; + r->shift = 0; + r->mult = 1; + /* + * Calibrate mult, shift so that token counting is accurate + * for smallest packet size (64 bytes). Token (time in ns) is + * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will + * work as long as the smallest packet transfer time can be + * accurately represented in nanosec. + */ + if (r->rate_bps > 0) { + /* + * Higher shift gives better accuracy. Find the largest + * shift such that mult fits in 32 bits. + */ + for (shift = 0; shift < 16; shift++) { + r->shift = shift; + factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); + mult = div64_u64(factor, r->rate_bps); + if (mult > UINT_MAX) + break; + } + + r->shift = shift - 1; + factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); + r->mult = div64_u64(factor, r->rate_bps); + } +} +EXPORT_SYMBOL(psched_ratecfg_precompute); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 6c2ec45..9facea0 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1389,7 +1389,6 @@ static void hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct hfsc_sched *q = qdisc_priv(sch); - struct hlist_node *n; struct hfsc_class *cl; unsigned int i; @@ -1397,7 +1396,7 @@ hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) { if (arg->count < arg->skip) { arg->count++; @@ -1523,11 +1522,10 @@ hfsc_reset_qdisc(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl; - struct hlist_node *n; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) hfsc_reset_class(cl); } q->eligible = RB_ROOT; @@ -1540,16 +1538,16 @@ static void hfsc_destroy_qdisc(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); - struct hlist_node *n, *next; + struct hlist_node *next; struct hfsc_class *cl; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) tcf_destroy_chain(&cl->filter_list); } for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], cl_common.hnode) hfsc_destroy_class(sch, cl); } @@ -1564,12 +1562,11 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) unsigned char *b = skb_tail_pointer(skb); struct tc_hfsc_qopt qopt; struct hfsc_class *cl; - struct hlist_node *n; unsigned int i; sch->qstats.backlog = 0; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) sch->qstats.backlog += cl->qdisc->qstats.backlog; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d2922c0..571f1d2 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -38,6 +38,7 @@ #include <linux/workqueue.h> #include <linux/slab.h> #include <net/netlink.h> +#include <net/sch_generic.h> #include <net/pkt_sched.h> /* HTB algorithm. @@ -71,12 +72,6 @@ enum htb_cmode { HTB_CAN_SEND /* class can send */ }; -struct htb_rate_cfg { - u64 rate_bps; - u32 mult; - u32 shift; -}; - /* interior & leaf nodes; props specific to leaves are marked L: */ struct htb_class { struct Qdisc_class_common common; @@ -124,8 +119,8 @@ struct htb_class { int filter_cnt; /* token bucket parameters */ - struct htb_rate_cfg rate; - struct htb_rate_cfg ceil; + struct psched_ratecfg rate; + struct psched_ratecfg ceil; s64 buffer, cbuffer; /* token bucket depth/rate */ psched_tdiff_t mbuffer; /* max wait time */ s64 tokens, ctokens; /* current number of tokens */ @@ -168,45 +163,6 @@ struct htb_sched { struct work_struct work; }; -static u64 l2t_ns(struct htb_rate_cfg *r, unsigned int len) -{ - return ((u64)len * r->mult) >> r->shift; -} - -static void htb_precompute_ratedata(struct htb_rate_cfg *r) -{ - u64 factor; - u64 mult; - int shift; - - r->shift = 0; - r->mult = 1; - /* - * Calibrate mult, shift so that token counting is accurate - * for smallest packet size (64 bytes). Token (time in ns) is - * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will - * work as long as the smallest packet transfer time can be - * accurately represented in nanosec. - */ - if (r->rate_bps > 0) { - /* - * Higher shift gives better accuracy. Find the largest - * shift such that mult fits in 32 bits. - */ - for (shift = 0; shift < 16; shift++) { - r->shift = shift; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - mult = div64_u64(factor, r->rate_bps); - if (mult > UINT_MAX) - break; - } - - r->shift = shift - 1; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - r->mult = div64_u64(factor, r->rate_bps); - } -} - /* find class in global hash table using given handle */ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) { @@ -632,7 +588,7 @@ static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff) if (toks > cl->buffer) toks = cl->buffer; - toks -= (s64) l2t_ns(&cl->rate, bytes); + toks -= (s64) psched_l2t_ns(&cl->rate, bytes); if (toks <= -cl->mbuffer) toks = 1 - cl->mbuffer; @@ -645,7 +601,7 @@ static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff) if (toks > cl->cbuffer) toks = cl->cbuffer; - toks -= (s64) l2t_ns(&cl->ceil, bytes); + toks -= (s64) psched_l2t_ns(&cl->ceil, bytes); if (toks <= -cl->mbuffer) toks = 1 - cl->mbuffer; @@ -919,7 +875,7 @@ ok: q->now = ktime_to_ns(ktime_get()); start_at = jiffies; - next_event = q->now + 5 * NSEC_PER_SEC; + next_event = q->now + 5LLU * NSEC_PER_SEC; for (level = 0; level < TC_HTB_MAXDEPTH; level++) { /* common case optimization - skip event handler quickly */ @@ -993,11 +949,10 @@ static void htb_reset(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl; - struct hlist_node *n; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->level) memset(&cl->un.inner, 0, sizeof(cl->un.inner)); else { @@ -1134,10 +1089,10 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, memset(&opt, 0, sizeof(opt)); - opt.rate.rate = cl->rate.rate_bps >> 3; - opt.buffer = cl->buffer; - opt.ceil.rate = cl->ceil.rate_bps >> 3; - opt.cbuffer = cl->cbuffer; + opt.rate.rate = psched_ratecfg_getrate(&cl->rate); + opt.buffer = PSCHED_NS2TICKS(cl->buffer); + opt.ceil.rate = psched_ratecfg_getrate(&cl->ceil); + opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer); opt.quantum = cl->quantum; opt.prio = cl->prio; opt.level = cl->level; @@ -1262,7 +1217,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) static void htb_destroy(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - struct hlist_node *n, *next; + struct hlist_node *next; struct htb_class *cl; unsigned int i; @@ -1276,11 +1231,11 @@ static void htb_destroy(struct Qdisc *sch) tcf_destroy_chain(&q->filter_list); for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) tcf_destroy_chain(&cl->filter_list); } for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], common.hnode) htb_destroy_class(sch, cl); } @@ -1459,8 +1414,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->parent = parent; /* set class to be in HTB_CAN_SEND state */ - cl->tokens = hopt->buffer; - cl->ctokens = hopt->cbuffer; + cl->tokens = PSCHED_TICKS2NS(hopt->buffer); + cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer); cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */ cl->t_c = psched_get_time(); cl->cmode = HTB_CAN_SEND; @@ -1503,17 +1458,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->prio = TC_HTB_NUMPRIO - 1; } - cl->buffer = hopt->buffer; - cl->cbuffer = hopt->cbuffer; - - cl->rate.rate_bps = (u64)hopt->rate.rate << 3; - cl->ceil.rate_bps = (u64)hopt->ceil.rate << 3; - - htb_precompute_ratedata(&cl->rate); - htb_precompute_ratedata(&cl->ceil); + psched_ratecfg_precompute(&cl->rate, hopt->rate.rate); + psched_ratecfg_precompute(&cl->ceil, hopt->ceil.rate); - cl->buffer = hopt->buffer << PSCHED_SHIFT; - cl->cbuffer = hopt->buffer << PSCHED_SHIFT; + cl->buffer = PSCHED_TICKS2NS(hopt->buffer); + cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer); sch_tree_unlock(sch); @@ -1566,14 +1515,13 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl; - struct hlist_node *n; unsigned int i; if (arg->stop) return; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 298c0dd..3d2acc7 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -438,18 +438,18 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->rate) { struct sk_buff_head *list = &sch->q; - delay += packet_len_2_sched_time(skb->len, q); - if (!skb_queue_empty(list)) { /* - * Last packet in queue is reference point (now). - * First packet in queue is already in flight, - * calculate this time bonus and substract + * Last packet in queue is reference point (now), + * calculate this time bonus and subtract * from delay. */ - delay -= now - netem_skb_cb(skb_peek(list))->time_to_send; + delay -= netem_skb_cb(skb_peek_tail(list))->time_to_send - now; + delay = max_t(psched_tdiff_t, 0, delay); now = netem_skb_cb(skb_peek_tail(list))->time_to_send; } + + delay += packet_len_2_sched_time(skb->len, q); } cb->time_to_send = now + delay; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 6ed3765..d51852b 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -276,9 +276,8 @@ static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q, u32 lmax, u32 weight) { struct qfq_aggregate *agg; - struct hlist_node *n; - hlist_for_each_entry(agg, n, &q->nonfull_aggs, nonfull_next) + hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next) if (agg->lmax == lmax && agg->class_weight == weight) return agg; @@ -299,6 +298,10 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg, new_num_classes == q->max_agg_classes - 1) /* agg no more full */ hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs); + /* The next assignment may let + * agg->initial_budget > agg->budgetmax + * hold, we will take it into account in charge_actual_service(). + */ agg->budgetmax = new_num_classes * agg->lmax; new_agg_weight = agg->class_weight * new_num_classes; agg->inv_w = ONE_FP/new_agg_weight; @@ -670,14 +673,13 @@ static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; - struct hlist_node *n; unsigned int i; if (arg->stop) return; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; @@ -819,7 +821,7 @@ static void qfq_make_eligible(struct qfq_sched *q) unsigned long old_vslot = q->oldV >> q->min_slot_shift; if (vslot != old_vslot) { - unsigned long mask = (1UL << fls(vslot ^ old_vslot)) - 1; + unsigned long mask = (1ULL << fls(vslot ^ old_vslot)) - 1; qfq_move_groups(q, mask, IR, ER); qfq_move_groups(q, mask, IB, EB); } @@ -990,12 +992,23 @@ static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg, /* Update F according to the actual service received by the aggregate. */ static inline void charge_actual_service(struct qfq_aggregate *agg) { - /* compute the service received by the aggregate */ - u32 service_received = agg->initial_budget - agg->budget; + /* Compute the service received by the aggregate, taking into + * account that, after decreasing the number of classes in + * agg, it may happen that + * agg->initial_budget - agg->budget > agg->bugdetmax + */ + u32 service_received = min(agg->budgetmax, + agg->initial_budget - agg->budget); agg->F = agg->S + (u64)service_received * agg->inv_w; } +static inline void qfq_update_agg_ts(struct qfq_sched *q, + struct qfq_aggregate *agg, + enum update_reason reason); + +static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg); + static struct sk_buff *qfq_dequeue(struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); @@ -1023,7 +1036,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) in_serv_agg->initial_budget = in_serv_agg->budget = in_serv_agg->budgetmax; - if (!list_empty(&in_serv_agg->active)) + if (!list_empty(&in_serv_agg->active)) { /* * Still active: reschedule for * service. Possible optimization: if no other @@ -1034,8 +1047,9 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) * handle it, we would need to maintain an * extra num_active_aggs field. */ - qfq_activate_agg(q, in_serv_agg, requeue); - else if (sch->q.qlen == 0) { /* no aggregate to serve */ + qfq_update_agg_ts(q, in_serv_agg, requeue); + qfq_schedule_agg(q, in_serv_agg); + } else if (sch->q.qlen == 0) { /* no aggregate to serve */ q->in_serv_agg = NULL; return NULL; } @@ -1054,7 +1068,15 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) qdisc_bstats_update(sch, skb); agg_dequeue(in_serv_agg, cl, len); - in_serv_agg->budget -= len; + /* If lmax is lowered, through qfq_change_class, for a class + * owning pending packets with larger size than the new value + * of lmax, then the following condition may hold. + */ + if (unlikely(in_serv_agg->budget < len)) + in_serv_agg->budget = 0; + else + in_serv_agg->budget -= len; + q->V += (u64)len * IWSUM; pr_debug("qfq dequeue: len %u F %lld now %lld\n", len, (unsigned long long) in_serv_agg->F, @@ -1219,17 +1241,11 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->deficit = agg->lmax; list_add_tail(&cl->alist, &agg->active); - if (list_first_entry(&agg->active, struct qfq_class, alist) != cl) - return err; /* aggregate was not empty, nothing else to do */ + if (list_first_entry(&agg->active, struct qfq_class, alist) != cl || + q->in_serv_agg == agg) + return err; /* non-empty or in service, nothing else to do */ - /* recharge budget */ - agg->initial_budget = agg->budget = agg->budgetmax; - - qfq_update_agg_ts(q, agg, enqueue); - if (q->in_serv_agg == NULL) - q->in_serv_agg = agg; - else if (agg != q->in_serv_agg) - qfq_schedule_agg(q, agg); + qfq_activate_agg(q, agg, enqueue); return err; } @@ -1263,7 +1279,8 @@ static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg) /* group was surely ineligible, remove */ __clear_bit(grp->index, &q->bitmaps[IR]); __clear_bit(grp->index, &q->bitmaps[IB]); - } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V)) + } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) && + q->in_serv_agg == NULL) q->V = roundedS; grp->S = roundedS; @@ -1286,8 +1303,15 @@ skip_update: static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg, enum update_reason reason) { + agg->initial_budget = agg->budget = agg->budgetmax; /* recharge budg. */ + qfq_update_agg_ts(q, agg, reason); - qfq_schedule_agg(q, agg); + if (q->in_serv_agg == NULL) { /* no aggr. in service or scheduled */ + q->in_serv_agg = agg; /* start serving this aggregate */ + /* update V: to be in service, agg must be eligible */ + q->oldV = q->V = agg->S; + } else if (agg != q->in_serv_agg) + qfq_schedule_agg(q, agg); } static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp, @@ -1359,8 +1383,6 @@ static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg) __set_bit(grp->index, &q->bitmaps[s]); } } - - qfq_update_eligible(q); } static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) @@ -1376,11 +1398,10 @@ static unsigned int qfq_drop_from_slot(struct qfq_sched *q, struct hlist_head *slot) { struct qfq_aggregate *agg; - struct hlist_node *n; struct qfq_class *cl; unsigned int len; - hlist_for_each_entry(agg, n, slot, next) { + hlist_for_each_entry(agg, slot, next) { list_for_each_entry(cl, &agg->active, alist) { if (!cl->qdisc->ops->drop) @@ -1459,11 +1480,10 @@ static void qfq_reset_qdisc(struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; - struct hlist_node *n; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->qdisc->q.qlen > 0) qfq_deactivate_class(q, cl); @@ -1477,13 +1497,13 @@ static void qfq_destroy_qdisc(struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; - struct hlist_node *n, *next; + struct hlist_node *next; unsigned int i; tcf_destroy_chain(&q->filter_list); for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], common.hnode) { qfq_destroy_class(sch, cl); } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 4b056c15..c8388f3 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -19,6 +19,7 @@ #include <linux/errno.h> #include <linux/skbuff.h> #include <net/netlink.h> +#include <net/sch_generic.h> #include <net/pkt_sched.h> @@ -100,23 +101,21 @@ struct tbf_sched_data { /* Parameters */ u32 limit; /* Maximal length of backlog: bytes */ - u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ - u32 mtu; + s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ + s64 mtu; u32 max_size; - struct qdisc_rate_table *R_tab; - struct qdisc_rate_table *P_tab; + struct psched_ratecfg rate; + struct psched_ratecfg peak; + bool peak_present; /* Variables */ - long tokens; /* Current number of B tokens */ - long ptokens; /* Current number of P tokens */ - psched_time_t t_c; /* Time check-point */ + s64 tokens; /* Current number of B tokens */ + s64 ptokens; /* Current number of P tokens */ + s64 t_c; /* Time check-point */ struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */ struct qdisc_watchdog watchdog; /* Watchdog timer */ }; -#define L2T(q, L) qdisc_l2t((q)->R_tab, L) -#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L) - static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -156,24 +155,24 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) skb = q->qdisc->ops->peek(q->qdisc); if (skb) { - psched_time_t now; - long toks; - long ptoks = 0; + s64 now; + s64 toks; + s64 ptoks = 0; unsigned int len = qdisc_pkt_len(skb); - now = psched_get_time(); - toks = psched_tdiff_bounded(now, q->t_c, q->buffer); + now = ktime_to_ns(ktime_get()); + toks = min_t(s64, now - q->t_c, q->buffer); - if (q->P_tab) { + if (q->peak_present) { ptoks = toks + q->ptokens; - if (ptoks > (long)q->mtu) + if (ptoks > q->mtu) ptoks = q->mtu; - ptoks -= L2T_P(q, len); + ptoks -= (s64) psched_l2t_ns(&q->peak, len); } toks += q->tokens; - if (toks > (long)q->buffer) + if (toks > q->buffer) toks = q->buffer; - toks -= L2T(q, len); + toks -= (s64) psched_l2t_ns(&q->rate, len); if ((toks|ptoks) >= 0) { skb = qdisc_dequeue_peeked(q->qdisc); @@ -189,8 +188,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) return skb; } - qdisc_watchdog_schedule(&q->watchdog, - now + max_t(long, -toks, -ptoks)); + qdisc_watchdog_schedule_ns(&q->watchdog, + now + max_t(long, -toks, -ptoks)); /* Maybe we have a shorter packet in the queue, which can be sent now. It sounds cool, @@ -214,7 +213,7 @@ static void tbf_reset(struct Qdisc *sch) qdisc_reset(q->qdisc); sch->q.qlen = 0; - q->t_c = psched_get_time(); + q->t_c = ktime_to_ns(ktime_get()); q->tokens = q->buffer; q->ptokens = q->mtu; qdisc_watchdog_cancel(&q->watchdog); @@ -293,14 +292,19 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->qdisc = child; } q->limit = qopt->limit; - q->mtu = qopt->mtu; + q->mtu = PSCHED_TICKS2NS(qopt->mtu); q->max_size = max_size; - q->buffer = qopt->buffer; + q->buffer = PSCHED_TICKS2NS(qopt->buffer); q->tokens = q->buffer; q->ptokens = q->mtu; - swap(q->R_tab, rtab); - swap(q->P_tab, ptab); + psched_ratecfg_precompute(&q->rate, rtab->rate.rate); + if (ptab) { + psched_ratecfg_precompute(&q->peak, ptab->rate.rate); + q->peak_present = true; + } else { + q->peak_present = false; + } sch_tree_unlock(sch); err = 0; @@ -319,7 +323,7 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - q->t_c = psched_get_time(); + q->t_c = ktime_to_ns(ktime_get()); qdisc_watchdog_init(&q->watchdog, sch); q->qdisc = &noop_qdisc; @@ -331,12 +335,6 @@ static void tbf_destroy(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); - - if (q->P_tab) - qdisc_put_rtab(q->P_tab); - if (q->R_tab) - qdisc_put_rtab(q->R_tab); - qdisc_destroy(q->qdisc); } @@ -352,13 +350,13 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; opt.limit = q->limit; - opt.rate = q->R_tab->rate; - if (q->P_tab) - opt.peakrate = q->P_tab->rate; + opt.rate.rate = psched_ratecfg_getrate(&q->rate); + if (q->peak_present) + opt.peakrate.rate = psched_ratecfg_getrate(&q->peak); else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); - opt.mtu = q->mtu; - opt.buffer = q->buffer; + opt.mtu = PSCHED_NS2TICKS(q->mtu); + opt.buffer = PSCHED_NS2TICKS(q->buffer); if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index c262106..cf48528 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -3,8 +3,8 @@ # menuconfig IP_SCTP - tristate "The SCTP Protocol (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL + tristate "The SCTP Protocol" + depends on INET depends on IPV6 || IPV6=n select CRYPTO select CRYPTO_HMAC @@ -68,7 +68,7 @@ config SCTP_DBG_OBJCNT If unsure, say N choice prompt "Default SCTP cookie HMAC encoding" - default SCTP_COOKIE_HMAC_MD5 + default SCTP_DEFAULT_COOKIE_HMAC_MD5 help This option sets the default sctp cookie hmac algorithm when in doubt select 'md5' diff --git a/net/sctp/associola.c b/net/sctp/associola.c index b45ed1f..d2709e2 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -434,8 +434,7 @@ void sctp_association_free(struct sctp_association *asoc) * on our state. */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) { - if (timer_pending(&asoc->timers[i]) && - del_timer(&asoc->timers[i])) + if (del_timer(&asoc->timers[i])) sctp_association_put(asoc); } @@ -1080,7 +1079,7 @@ struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *asoc, transports) { if (transport == active) - break; + continue; list_for_each_entry(chunk, &transport->transmitted, transmitted_list) { if (key == chunk->subh.data_hdr->tsn) { @@ -1497,7 +1496,7 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len) /* Stop the SACK timer. */ timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK]; - if (timer_pending(timer) && del_timer(timer)) + if (del_timer(timer)) sctp_association_put(asoc); } } @@ -1592,32 +1591,31 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc, /* Set an association id for a given association */ int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp) { - int assoc_id; - int error = 0; + bool preload = gfp & __GFP_WAIT; + int ret; /* If the id is already assigned, keep it. */ if (asoc->assoc_id) - return error; -retry: - if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp))) - return -ENOMEM; + return 0; + if (preload) + idr_preload(gfp); spin_lock_bh(&sctp_assocs_id_lock); - error = idr_get_new_above(&sctp_assocs_id, (void *)asoc, - idr_low, &assoc_id); - if (!error) { - idr_low = assoc_id + 1; + /* 0 is not a valid id, idr_low is always >= 1 */ + ret = idr_alloc(&sctp_assocs_id, asoc, idr_low, 0, GFP_NOWAIT); + if (ret >= 0) { + idr_low = ret + 1; if (idr_low == INT_MAX) idr_low = 1; } spin_unlock_bh(&sctp_assocs_id_lock); - if (error == -EAGAIN) - goto retry; - else if (error) - return error; + if (preload) + idr_preload_end(); + if (ret < 0) + return ret; - asoc->assoc_id = (sctp_assoc_t) assoc_id; - return error; + asoc->assoc_id = (sctp_assoc_t)ret; + return 0; } /* Free the ASCONF queue */ diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 159b9bc..ba1dfc3 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -71,7 +71,7 @@ void sctp_auth_key_put(struct sctp_auth_bytes *key) return; if (atomic_dec_and_test(&key->refcnt)) { - kfree(key); + kzfree(key); SCTP_DBG_OBJCNT_DEC(keys); } } @@ -200,27 +200,28 @@ static struct sctp_auth_bytes *sctp_auth_make_key_vector( struct sctp_auth_bytes *new; __u32 len; __u32 offset = 0; + __u16 random_len, hmacs_len, chunks_len = 0; - len = ntohs(random->param_hdr.length) + ntohs(hmacs->param_hdr.length); - if (chunks) - len += ntohs(chunks->param_hdr.length); + random_len = ntohs(random->param_hdr.length); + hmacs_len = ntohs(hmacs->param_hdr.length); + if (chunks) + chunks_len = ntohs(chunks->param_hdr.length); - new = kmalloc(sizeof(struct sctp_auth_bytes) + len, gfp); + len = random_len + hmacs_len + chunks_len; + + new = sctp_auth_create_key(len, gfp); if (!new) return NULL; - new->len = len; - - memcpy(new->data, random, ntohs(random->param_hdr.length)); - offset += ntohs(random->param_hdr.length); + memcpy(new->data, random, random_len); + offset += random_len; if (chunks) { - memcpy(new->data + offset, chunks, - ntohs(chunks->param_hdr.length)); - offset += ntohs(chunks->param_hdr.length); + memcpy(new->data + offset, chunks, chunks_len); + offset += chunks_len; } - memcpy(new->data + offset, hmacs, ntohs(hmacs->param_hdr.length)); + memcpy(new->data + offset, hmacs, hmacs_len); return new; } @@ -350,8 +351,8 @@ static struct sctp_auth_bytes *sctp_auth_asoc_create_secret( secret = sctp_auth_asoc_set_secret(ep_key, first_vector, last_vector, gfp); out: - kfree(local_key_vector); - kfree(peer_key_vector); + sctp_auth_key_put(local_key_vector); + sctp_auth_key_put(peer_key_vector); return secret; } diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 17a001b..12ed45d 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -151,13 +151,11 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, ep->rcvbuf_policy = net->sctp.rcvbuf_policy; /* Initialize the secret key used with cookie. */ - get_random_bytes(&ep->secret_key[0], SCTP_SECRET_SIZE); - ep->last_key = ep->current_key = 0; - ep->key_changed_at = jiffies; + get_random_bytes(ep->secret_key, sizeof(ep->secret_key)); /* SCTP-AUTH extensions*/ INIT_LIST_HEAD(&ep->endpoint_shared_keys); - null_key = sctp_auth_shkey_create(0, GFP_KERNEL); + null_key = sctp_auth_shkey_create(0, gfp); if (!null_key) goto nomem; @@ -271,6 +269,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) sctp_inq_free(&ep->base.inqueue); sctp_bind_addr_free(&ep->base.bind_addr); + memset(ep->secret_key, 0, sizeof(ep->secret_key)); + /* Remove and free the port */ if (sctp_sk(ep->base.sk)->bind_hash) sctp_put_port(ep->base.sk); @@ -332,7 +332,6 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( struct sctp_transport *t = NULL; struct sctp_hashbucket *head; struct sctp_ep_common *epb; - struct hlist_node *node; int hash; int rport; @@ -350,7 +349,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( rport); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { tmp = sctp_assoc(epb); if (tmp->ep != ep || rport != tmp->peer.port) continue; diff --git a/net/sctp/input.c b/net/sctp/input.c index 8bd3c27..4b2c831 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -468,8 +468,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, } else { struct net *net = sock_net(sk); - if (timer_pending(&t->proto_unreach_timer) && - del_timer(&t->proto_unreach_timer)) + if (del_timer(&t->proto_unreach_timer)) sctp_association_put(asoc); sctp_do_sm(net, SCTP_EVENT_T_OTHER, @@ -785,13 +784,12 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_endpoint *ep; - struct hlist_node *node; int hash; hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port)); head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { ep = sctp_ep(epb); if (sctp_endpoint_is_match(ep, net, laddr)) goto hit; @@ -877,7 +875,6 @@ static struct sctp_association *__sctp_lookup_association( struct sctp_ep_common *epb; struct sctp_association *asoc; struct sctp_transport *transport; - struct hlist_node *node; int hash; /* Optimize here for direct hit, only listening connections can @@ -887,7 +884,7 @@ static struct sctp_association *__sctp_lookup_association( ntohs(peer->v4.sin_port)); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { asoc = sctp_assoc(epb); transport = sctp_assoc_is_match(asoc, net, local, peer); if (transport) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f3f0f4d..391a245 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -326,9 +326,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, */ rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid && laddr->state != SCTP_ADDR_SRC) + if (!laddr->valid) continue; - if ((laddr->a.sa.sa_family == AF_INET6) && + if ((laddr->state == SCTP_ADDR_SRC) && + (laddr->a.sa.sa_family == AF_INET6) && (scope <= sctp_scope(&laddr->a))) { bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); if (!baddr || (matchlen < bmatchlen)) { diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 379c81d..01dca75 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -224,7 +224,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) /* Free the outqueue structure and any related pending chunks. */ -void sctp_outq_teardown(struct sctp_outq *q) +static void __sctp_outq_teardown(struct sctp_outq *q) { struct sctp_transport *transport; struct list_head *lchunk, *temp; @@ -277,8 +277,6 @@ void sctp_outq_teardown(struct sctp_outq *q) sctp_chunk_free(chunk); } - q->error = 0; - /* Throw away any leftover control chunks. */ list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) { list_del_init(&chunk->list); @@ -286,11 +284,17 @@ void sctp_outq_teardown(struct sctp_outq *q) } } +void sctp_outq_teardown(struct sctp_outq *q) +{ + __sctp_outq_teardown(q); + sctp_outq_init(q->asoc, q); +} + /* Free the outqueue structure and any related pending chunks. */ void sctp_outq_free(struct sctp_outq *q) { /* Throw away leftover chunks. */ - sctp_outq_teardown(q); + __sctp_outq_teardown(q); /* If we were kmalloc()'d, free the memory. */ if (q->malloced) @@ -1696,10 +1700,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, * address. */ if (!transport->flight_size) { - if (timer_pending(&transport->T3_rtx_timer) && - del_timer(&transport->T3_rtx_timer)) { + if (del_timer(&transport->T3_rtx_timer)) sctp_transport_put(transport); - } } else if (restart_timer) { if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) diff --git a/net/sctp/probe.c b/net/sctp/probe.c index 5f7518d..ad0dba8 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -122,12 +122,12 @@ static const struct file_operations sctpprobe_fops = { .llseek = noop_llseek, }; -sctp_disposition_t jsctp_sf_eat_sack(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +static sctp_disposition_t jsctp_sf_eat_sack(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) { struct sctp_transport *sp; static __u32 lcwnd = 0; @@ -183,13 +183,20 @@ static __init int sctpprobe_init(void) { int ret = -ENOMEM; + /* Warning: if the function signature of sctp_sf_eat_sack_6_2, + * has been changed, you also have to change the signature of + * jsctp_sf_eat_sack, otherwise you end up right here! + */ + BUILD_BUG_ON(__same_type(sctp_sf_eat_sack_6_2, + jsctp_sf_eat_sack) == 0); + init_waitqueue_head(&sctpw.wait); spin_lock_init(&sctpw.lock); if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL)) return ret; - if (!proc_net_fops_create(&init_net, procname, S_IRUSR, - &sctpprobe_fops)) + if (!proc_create(procname, S_IRUSR, init_net.proc_net, + &sctpprobe_fops)) goto free_kfifo; ret = register_jprobe(&sctp_recv_probe); @@ -201,7 +208,7 @@ static __init int sctpprobe_init(void) return 0; remove_proc: - proc_net_remove(&init_net, procname); + remove_proc_entry(procname, init_net.proc_net); free_kfifo: kfifo_free(&sctpw.fifo); return ret; @@ -210,7 +217,7 @@ free_kfifo: static __exit void sctpprobe_exit(void) { kfifo_free(&sctpw.fifo); - proc_net_remove(&init_net, procname); + remove_proc_entry(procname, init_net.proc_net); unregister_jprobe(&sctp_recv_probe); } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 8c19e97..ab3bba8 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -213,7 +213,6 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) struct sctp_ep_common *epb; struct sctp_endpoint *ep; struct sock *sk; - struct hlist_node *node; int hash = *(loff_t *)v; if (hash >= sctp_ep_hashsize) @@ -222,7 +221,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) head = &sctp_ep_hashtable[hash]; sctp_local_bh_disable(); read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { ep = sctp_ep(epb); sk = epb->sk; if (!net_eq(sock_net(sk), seq_file_net(seq))) @@ -321,7 +320,6 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) struct sctp_ep_common *epb; struct sctp_association *assoc; struct sock *sk; - struct hlist_node *node; int hash = *(loff_t *)v; if (hash >= sctp_assoc_hashsize) @@ -330,7 +328,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) head = &sctp_assoc_hashtable[hash]; sctp_local_bh_disable(); read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { assoc = sctp_assoc(epb); sk = epb->sk; if (!net_eq(sock_net(sk), seq_file_net(seq))) @@ -436,7 +434,6 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_association *assoc; - struct hlist_node *node; struct sctp_transport *tsp; int hash = *(loff_t *)v; @@ -447,7 +444,7 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) sctp_local_bh_disable(); read_lock(&head->lock); rcu_read_lock(); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { if (!net_eq(sock_net(epb->sk), seq_file_net(seq))) continue; assoc = sctp_assoc(epb); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f898b1c..1c2e46c 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -595,7 +595,7 @@ static void sctp_v4_ecn_capable(struct sock *sk) INET_ECN_xmit(sk); } -void sctp_addr_wq_timeout_handler(unsigned long arg) +static void sctp_addr_wq_timeout_handler(unsigned long arg) { struct net *net = (struct net *)arg; struct sctp_sockaddr_entry *addrw, *temp; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index e1c5fc2..cf579e7 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1201,7 +1201,7 @@ nodata: * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT) * This is a helper function to allocate an error chunk for * for those invalid parameter codes in which we may not want - * to report all the errors, if the incomming chunk is large + * to report all the errors, if the incoming chunk is large */ static inline struct sctp_chunk *sctp_make_op_error_fixed( const struct sctp_association *asoc, @@ -1589,8 +1589,6 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, struct sctp_signed_cookie *cookie; struct scatterlist sg; int headersize, bodysize; - unsigned int keylen; - char *key; /* Header size is static data prior to the actual cookie, including * any padding. @@ -1650,12 +1648,11 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, /* Sign the message. */ sg_init_one(&sg, &cookie->c, bodysize); - keylen = SCTP_SECRET_SIZE; - key = (char *)ep->secret_key[ep->current_key]; desc.tfm = sctp_sk(ep->base.sk)->hmac; desc.flags = 0; - if (crypto_hash_setkey(desc.tfm, key, keylen) || + if (crypto_hash_setkey(desc.tfm, ep->secret_key, + sizeof(ep->secret_key)) || crypto_hash_digest(&desc, &sg, bodysize, cookie->signature)) goto free_cookie; } @@ -1682,8 +1679,7 @@ struct sctp_association *sctp_unpack_cookie( int headersize, bodysize, fixed_size; __u8 *digest = ep->digest; struct scatterlist sg; - unsigned int keylen, len; - char *key; + unsigned int len; sctp_scope_t scope; struct sk_buff *skb = chunk->skb; struct timeval tv; @@ -1718,34 +1714,21 @@ struct sctp_association *sctp_unpack_cookie( goto no_hmac; /* Check the signature. */ - keylen = SCTP_SECRET_SIZE; sg_init_one(&sg, bear_cookie, bodysize); - key = (char *)ep->secret_key[ep->current_key]; desc.tfm = sctp_sk(ep->base.sk)->hmac; desc.flags = 0; memset(digest, 0x00, SCTP_SIGNATURE_SIZE); - if (crypto_hash_setkey(desc.tfm, key, keylen) || + if (crypto_hash_setkey(desc.tfm, ep->secret_key, + sizeof(ep->secret_key)) || crypto_hash_digest(&desc, &sg, bodysize, digest)) { *error = -SCTP_IERROR_NOMEM; goto fail; } if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { - /* Try the previous key. */ - key = (char *)ep->secret_key[ep->last_key]; - memset(digest, 0x00, SCTP_SIGNATURE_SIZE); - if (crypto_hash_setkey(desc.tfm, key, keylen) || - crypto_hash_digest(&desc, &sg, bodysize, digest)) { - *error = -SCTP_IERROR_NOMEM; - goto fail; - } - - if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { - /* Yikes! Still bad signature! */ - *error = -SCTP_IERROR_BAD_SIG; - goto fail; - } + *error = -SCTP_IERROR_BAD_SIG; + goto fail; } no_hmac: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index c957775..8aab894 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -674,10 +674,8 @@ static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds, list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { - if (timer_pending(&t->T3_rtx_timer) && - del_timer(&t->T3_rtx_timer)) { + if (del_timer(&t->T3_rtx_timer)) sctp_transport_put(t); - } } } @@ -1517,7 +1515,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_TIMER_STOP: timer = &asoc->timers[cmd->obj.to]; - if (timer_pending(timer) && del_timer(timer)) + if (del_timer(timer)) sctp_association_put(asoc); break; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 618ec7e..de1a013 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1779,8 +1779,10 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net, /* Update the content of current association. */ sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, + SCTP_STATE(SCTP_STATE_ESTABLISHED)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); return SCTP_DISPOSITION_CONSUME; nomem_ev: @@ -2080,7 +2082,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, } /* Delete the tempory new association. */ - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); /* Restore association pointer to provide SCTP command interpeter diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9e65758..b907073 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3390,7 +3390,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk, ret = sctp_auth_set_key(sctp_sk(sk)->ep, asoc, authkey); out: - kfree(authkey); + kzfree(authkey); return ret; } @@ -5653,6 +5653,9 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, if (len < sizeof(sctp_assoc_t)) return -EINVAL; + /* Allow the struct to grow and fill in as much as possible */ + len = min_t(size_t, len, sizeof(sas)); + if (copy_from_user(&sas, optval, len)) return -EFAULT; @@ -5686,9 +5689,6 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, /* Mark beginning of a new observation period */ asoc->stats.max_obs_rto = asoc->rto_min; - /* Allow the struct to grow and fill in as much as possible */ - len = min_t(size_t, len, sizeof(sas)); - if (put_user(len, optlen)) return -EFAULT; @@ -5882,8 +5882,7 @@ static struct sctp_bind_bucket *sctp_bucket_create( static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) { struct sctp_bind_hashbucket *head; /* hash list */ - struct sctp_bind_bucket *pp; /* hash list port iterator */ - struct hlist_node *node; + struct sctp_bind_bucket *pp; unsigned short snum; int ret; @@ -5910,7 +5909,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) index = sctp_phashfn(sock_net(sk), rover); head = &sctp_port_hashtable[index]; sctp_spin_lock(&head->lock); - sctp_for_each_hentry(pp, node, &head->chain) + sctp_for_each_hentry(pp, &head->chain) if ((pp->port == rover) && net_eq(sock_net(sk), pp->net)) goto next; @@ -5938,7 +5937,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) */ head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)]; sctp_spin_lock(&head->lock); - sctp_for_each_hentry(pp, node, &head->chain) { + sctp_for_each_hentry(pp, &head->chain) { if ((pp->port == snum) && net_eq(pp->net, sock_net(sk))) goto pp_found; } @@ -5970,7 +5969,7 @@ pp_found: * that this port/socket (sk) combination are already * in an endpoint. */ - sk_for_each_bound(sk2, node, &pp->owner) { + sk_for_each_bound(sk2, &pp->owner) { struct sctp_endpoint *ep2; ep2 = sctp_sk(sk2)->ep; diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c index 442ad4e..825ea94 100644 --- a/net/sctp/ssnmap.c +++ b/net/sctp/ssnmap.c @@ -41,8 +41,6 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -#define MAX_KMALLOC_SIZE 131072 - static struct sctp_ssnmap *sctp_ssnmap_init(struct sctp_ssnmap *map, __u16 in, __u16 out); @@ -65,7 +63,7 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, int size; size = sctp_ssnmap_size(in, out); - if (size <= MAX_KMALLOC_SIZE) + if (size <= KMALLOC_MAX_SIZE) retval = kmalloc(size, gfp); else retval = (struct sctp_ssnmap *) @@ -82,7 +80,7 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, return retval; fail_map: - if (size <= MAX_KMALLOC_SIZE) + if (size <= KMALLOC_MAX_SIZE) kfree(retval); else free_pages((unsigned long)retval, get_order(size)); @@ -124,7 +122,7 @@ void sctp_ssnmap_free(struct sctp_ssnmap *map) int size; size = sctp_ssnmap_size(map->in.len, map->out.len); - if (size <= MAX_KMALLOC_SIZE) + if (size <= KMALLOC_MAX_SIZE) kfree(map); else free_pages((unsigned long)map, get_order(size)); diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 043889a..bf3c6e8 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -366,7 +366,11 @@ int sctp_sysctl_net_register(struct net *net) void sctp_sysctl_net_unregister(struct net *net) { + struct ctl_table *table; + + table = net->sctp.sysctl_header->ctl_table_arg; unregister_net_sysctl_table(net->sctp.sysctl_header); + kfree(table); } static struct ctl_table_header * sctp_sysctl_header; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 4e45bb6..fafd2a4 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -151,13 +151,11 @@ void sctp_transport_free(struct sctp_transport *transport) * structure hang around in memory since we know * the tranport is going away. */ - if (timer_pending(&transport->T3_rtx_timer) && - del_timer(&transport->T3_rtx_timer)) + if (del_timer(&transport->T3_rtx_timer)) sctp_transport_put(transport); /* Delete the ICMP proto unreachable timer if it's active. */ - if (timer_pending(&transport->proto_unreach_timer) && - del_timer(&transport->proto_unreach_timer)) + if (del_timer(&transport->proto_unreach_timer)) sctp_association_put(transport->asoc); sctp_transport_put(transport); @@ -168,10 +166,6 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head) struct sctp_transport *transport; transport = container_of(head, struct sctp_transport, rcu); - if (transport->asoc) - sctp_association_put(transport->asoc); - - sctp_packet_free(&transport->packet); dst_release(transport->dst); kfree(transport); @@ -186,6 +180,11 @@ static void sctp_transport_destroy(struct sctp_transport *transport) SCTP_ASSERT(transport->dead, "Transport is not dead", return); call_rcu(&transport->rcu, sctp_transport_destroy_rcu); + + sctp_packet_free(&transport->packet); + + if (transport->asoc) + sctp_association_put(transport->asoc); } /* Start T3_rtx timer if it is not already running and update the heartbeat @@ -654,10 +653,9 @@ void sctp_transport_reset(struct sctp_transport *t) void sctp_transport_immediate_rtx(struct sctp_transport *t) { /* Stop pending T3_rtx_timer */ - if (timer_pending(&t->T3_rtx_timer)) { - (void)del_timer(&t->T3_rtx_timer); + if (del_timer(&t->T3_rtx_timer)) sctp_transport_put(t); - } + sctp_retransmit(&t->asoc->outqueue, t, SCTP_RTXR_T3_RTX); if (!timer_pending(&t->T3_rtx_timer)) { if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto)) diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 5f25e0c..396c451 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -51,7 +51,7 @@ static void sctp_tsnmap_update(struct sctp_tsnmap *map); static void sctp_tsnmap_find_gap_ack(unsigned long *map, __u16 off, __u16 len, __u16 *start, __u16 *end); -static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap); +static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 size); /* Initialize a block of memory as a tsnmap. */ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *map, __u16 len, @@ -124,7 +124,7 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn, gap = tsn - map->base_tsn; - if (gap >= map->len && !sctp_tsnmap_grow(map, gap)) + if (gap >= map->len && !sctp_tsnmap_grow(map, gap + 1)) return -ENOMEM; if (!sctp_tsnmap_has_gap(map) && gap == 0) { @@ -360,23 +360,24 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map, return ngaps; } -static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap) +static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 size) { unsigned long *new; unsigned long inc; u16 len; - if (gap >= SCTP_TSN_MAP_SIZE) + if (size > SCTP_TSN_MAP_SIZE) return 0; - inc = ALIGN((gap - map->len),BITS_PER_LONG) + SCTP_TSN_MAP_INCREMENT; + inc = ALIGN((size - map->len), BITS_PER_LONG) + SCTP_TSN_MAP_INCREMENT; len = min_t(u16, map->len + inc, SCTP_TSN_MAP_SIZE); new = kzalloc(len>>3, GFP_ATOMIC); if (!new) return 0; - bitmap_copy(new, map->tsn_map, map->max_tsn_seen - map->base_tsn); + bitmap_copy(new, map->tsn_map, + map->max_tsn_seen - map->cumulative_tsn_ack_point); kfree(map->tsn_map); map->tsn_map = new; map->len = len; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index ada1746..0fd5b3d 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -106,6 +106,7 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, { struct sk_buff_head temp; struct sctp_ulpevent *event; + int event_eor = 0; /* Create an event from the incoming chunk. */ event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp); @@ -127,10 +128,12 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, /* Send event to the ULP. 'event' is the sctp_ulpevent for * very first SKB on the 'temp' list. */ - if (event) + if (event) { + event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0; sctp_ulpq_tail_event(ulpq, event); + } - return 0; + return event_eor; } /* Add a new event for propagation to the ULP. */ @@ -540,14 +543,19 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq) ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { + case SCTP_DATA_FIRST_FRAG: + if (!first_frag) + return NULL; + goto done; case SCTP_DATA_MIDDLE_FRAG: if (!first_frag) { first_frag = pos; next_tsn = ctsn + 1; last_frag = pos; - } else if (next_tsn == ctsn) + } else if (next_tsn == ctsn) { next_tsn++; - else + last_frag = pos; + } else goto done; break; case SCTP_DATA_LAST_FRAG: @@ -651,6 +659,14 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq) } else goto done; break; + + case SCTP_DATA_LAST_FRAG: + if (!first_frag) + return NULL; + else + goto done; + break; + default: return NULL; } @@ -962,20 +978,43 @@ static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, struct sk_buff_head *list, __u16 needed) { __u16 freed = 0; - __u32 tsn; - struct sk_buff *skb; + __u32 tsn, last_tsn; + struct sk_buff *skb, *flist, *last; struct sctp_ulpevent *event; struct sctp_tsnmap *tsnmap; tsnmap = &ulpq->asoc->peer.tsn_map; - while ((skb = __skb_dequeue_tail(list)) != NULL) { - freed += skb_headlen(skb); + while ((skb = skb_peek_tail(list)) != NULL) { event = sctp_skb2event(skb); tsn = event->tsn; + /* Don't renege below the Cumulative TSN ACK Point. */ + if (TSN_lte(tsn, sctp_tsnmap_get_ctsn(tsnmap))) + break; + + /* Events in ordering queue may have multiple fragments + * corresponding to additional TSNs. Sum the total + * freed space; find the last TSN. + */ + freed += skb_headlen(skb); + flist = skb_shinfo(skb)->frag_list; + for (last = flist; flist; flist = flist->next) { + last = flist; + freed += skb_headlen(last); + } + if (last) + last_tsn = sctp_skb2event(last)->tsn; + else + last_tsn = tsn; + + /* Unlink the event, then renege all applicable TSNs. */ + __skb_unlink(skb, list); sctp_ulpevent_free(event); - sctp_tsnmap_renege(tsnmap, tsn); + while (TSN_lte(tsn, last_tsn)) { + sctp_tsnmap_renege(tsnmap, tsn); + tsn++; + } if (freed >= needed) return freed; } @@ -1002,16 +1041,28 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event; struct sctp_association *asoc; struct sctp_sock *sp; + __u32 ctsn; + struct sk_buff *skb; asoc = ulpq->asoc; sp = sctp_sk(asoc->base.sk); /* If the association is already in Partial Delivery mode - * we have noting to do. + * we have nothing to do. */ if (ulpq->pd_mode) return; + /* Data must be at or below the Cumulative TSN ACK Point to + * start partial delivery. + */ + skb = skb_peek(&asoc->ulpq.reasm); + if (skb != NULL) { + ctsn = sctp_skb2event(skb)->tsn; + if (!TSN_lte(ctsn, sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map))) + return; + } + /* If the user enabled fragment interleave socket option, * multiple associations can enter partial delivery. * Otherwise, we can only enter partial delivery if the @@ -1054,12 +1105,16 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, } /* If able to free enough room, accept this chunk. */ if (chunk && (freed >= needed)) { - __u32 tsn; - tsn = ntohl(chunk->subh.data_hdr->tsn); - sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport); - sctp_ulpq_tail_data(ulpq, chunk, gfp); - - sctp_ulpq_partial_delivery(ulpq, gfp); + int retval; + retval = sctp_ulpq_tail_data(ulpq, chunk, gfp); + /* + * Enter partial delivery if chunk has not been + * delivered; otherwise, drain the reassembly queue. + */ + if (retval <= 0) + sctp_ulpq_partial_delivery(ulpq, gfp); + else if (retval == 1) + sctp_ulpq_reasm_drain(ulpq); } sk_mem_reclaim(asoc->base.sk); diff --git a/net/socket.c b/net/socket.c index 2ca51c7..88f759a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -69,7 +69,6 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/mutex.h> -#include <linux/wanrouter.h> #include <linux/if_bridge.h> #include <linux/if_frad.h> #include <linux/if_vlan.h> @@ -370,16 +369,15 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &socket_file_ops); - if (unlikely(!file)) { + if (unlikely(IS_ERR(file))) { /* drop dentry, keep inode */ ihold(path.dentry->d_inode); path_put(&path); - return ERR_PTR(-ENFILE); + return file; } sock->file = file; file->f_flags = O_RDWR | (flags & O_NONBLOCK); - file->f_pos = 0; file->private_data = sock; return file; } @@ -2838,7 +2836,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) } ifr = compat_alloc_user_space(buf_size); - rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8); + rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8); if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) return -EFAULT; @@ -2862,12 +2860,12 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) offsetof(struct ethtool_rxnfc, fs.ring_cookie)); if (copy_in_user(rxnfc, compat_rxnfc, - (void *)(&rxnfc->fs.m_ext + 1) - - (void *)rxnfc) || + (void __user *)(&rxnfc->fs.m_ext + 1) - + (void __user *)rxnfc) || copy_in_user(&rxnfc->fs.ring_cookie, &compat_rxnfc->fs.ring_cookie, - (void *)(&rxnfc->fs.location + 1) - - (void *)&rxnfc->fs.ring_cookie) || + (void __user *)(&rxnfc->fs.location + 1) - + (void __user *)&rxnfc->fs.ring_cookie) || copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, sizeof(rxnfc->rule_cnt))) return -EFAULT; @@ -2879,12 +2877,12 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) if (convert_out) { if (copy_in_user(compat_rxnfc, rxnfc, - (const void *)(&rxnfc->fs.m_ext + 1) - - (const void *)rxnfc) || + (const void __user *)(&rxnfc->fs.m_ext + 1) - + (const void __user *)rxnfc) || copy_in_user(&compat_rxnfc->fs.ring_cookie, &rxnfc->fs.ring_cookie, - (const void *)(&rxnfc->fs.location + 1) - - (const void *)&rxnfc->fs.ring_cookie) || + (const void __user *)(&rxnfc->fs.location + 1) - + (const void __user *)&rxnfc->fs.ring_cookie) || copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, sizeof(rxnfc->rule_cnt))) return -EFAULT; diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 03d03e3..516fe2c 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -10,7 +10,7 @@ config SUNRPC_BACKCHANNEL config SUNRPC_XPRT_RDMA tristate - depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL + depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS default SUNRPC && INFINIBAND help This option allows the NFS client and server to support diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index d11418f..a622ad6 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -17,7 +17,8 @@ */ #include <net/ipv6.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> +#include <linux/sunrpc/msg_prot.h> #include <linux/slab.h> #include <linux/export.h> diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index b5c067b..f529404 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -407,15 +407,14 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, { LIST_HEAD(free); struct rpc_cred_cache *cache = auth->au_credcache; - struct hlist_node *pos; struct rpc_cred *cred = NULL, *entry, *new; unsigned int nr; - nr = hash_long(acred->uid, cache->hashbits); + nr = hash_long(from_kuid(&init_user_ns, acred->uid), cache->hashbits); rcu_read_lock(); - hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { + hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) { if (!entry->cr_ops->crmatch(acred, entry, flags)) continue; spin_lock(&cache->lock); @@ -439,7 +438,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, } spin_lock(&cache->lock); - hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) { + hlist_for_each_entry(entry, &cache->hashtable[nr], cr_hash) { if (!entry->cr_ops->crmatch(acred, entry, flags)) continue; cred = get_rpccred(entry); @@ -519,8 +518,8 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred acred = { - .uid = 0, - .gid = 0, + .uid = GLOBAL_ROOT_UID, + .gid = GLOBAL_ROOT_GID, }; dprintk("RPC: %5u looking up %s cred\n", diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 6ed6f20..b6badaf 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -18,8 +18,8 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -#define RPC_MACHINE_CRED_USERID ((uid_t)0) -#define RPC_MACHINE_CRED_GROUPID ((gid_t)0) +#define RPC_MACHINE_CRED_USERID GLOBAL_ROOT_UID +#define RPC_MACHINE_CRED_GROUPID GLOBAL_ROOT_GID struct generic_cred { struct rpc_cred gc_base; @@ -96,7 +96,9 @@ generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) dprintk("RPC: allocated %s cred %p for uid %d gid %d\n", gcred->acred.machine_cred ? "machine" : "generic", - gcred, acred->uid, acred->gid); + gcred, + from_kuid(&init_user_ns, acred->uid), + from_kgid(&init_user_ns, acred->gid)); return &gcred->gc_base; } @@ -129,8 +131,8 @@ machine_cred_match(struct auth_cred *acred, struct generic_cred *gcred, int flag { if (!gcred->acred.machine_cred || gcred->acred.principal != acred->principal || - gcred->acred.uid != acred->uid || - gcred->acred.gid != acred->gid) + !uid_eq(gcred->acred.uid, acred->uid) || + !gid_eq(gcred->acred.gid, acred->gid)) return 0; return 1; } @@ -147,8 +149,8 @@ generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) if (acred->machine_cred) return machine_cred_match(acred, gcred, flags); - if (gcred->acred.uid != acred->uid || - gcred->acred.gid != acred->gid || + if (!uid_eq(gcred->acred.uid, acred->uid) || + !gid_eq(gcred->acred.gid, acred->gid) || gcred->acred.machine_cred != 0) goto out_nomatch; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 6e5c824..5257d29 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -247,8 +247,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct __func__, ctx->gc_expiry, now, timeout); return q; err: - dprintk("RPC: %s returns %ld gc_expiry %lu now %lu timeout %u\n", - __func__, -PTR_ERR(p), ctx->gc_expiry, now, timeout); + dprintk("RPC: %s returns error %ld\n", __func__, -PTR_ERR(p)); return p; } @@ -256,7 +255,7 @@ err: struct gss_upcall_msg { atomic_t count; - uid_t uid; + kuid_t uid; struct rpc_pipe_msg msg; struct list_head list; struct gss_auth *auth; @@ -303,11 +302,11 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) } static struct gss_upcall_msg * -__gss_find_upcall(struct rpc_pipe *pipe, uid_t uid) +__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid) { struct gss_upcall_msg *pos; list_for_each_entry(pos, &pipe->in_downcall, list) { - if (pos->uid != uid) + if (!uid_eq(pos->uid, uid)) continue; atomic_inc(&pos->count); dprintk("RPC: %s found msg %p\n", __func__, pos); @@ -395,8 +394,11 @@ gss_upcall_callback(struct rpc_task *task) static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg) { - gss_msg->msg.data = &gss_msg->uid; - gss_msg->msg.len = sizeof(gss_msg->uid); + uid_t uid = from_kuid(&init_user_ns, gss_msg->uid); + memcpy(gss_msg->databuf, &uid, sizeof(uid)); + gss_msg->msg.data = gss_msg->databuf; + gss_msg->msg.len = sizeof(uid); + BUG_ON(sizeof(uid) > UPCALL_BUF_LEN); } static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, @@ -409,7 +411,7 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, gss_msg->msg.len = sprintf(gss_msg->databuf, "mech=%s uid=%d ", mech->gm_name, - gss_msg->uid); + from_kuid(&init_user_ns, gss_msg->uid)); p += gss_msg->msg.len; if (clnt->cl_principal) { len = sprintf(p, "target=%s ", clnt->cl_principal); @@ -445,7 +447,7 @@ static void gss_encode_msg(struct gss_upcall_msg *gss_msg, static struct gss_upcall_msg * gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, - uid_t uid, const char *service_name) + kuid_t uid, const char *service_name) { struct gss_upcall_msg *gss_msg; int vers; @@ -475,7 +477,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_new, *gss_msg; - uid_t uid = cred->cr_uid; + kuid_t uid = cred->cr_uid; gss_new = gss_alloc_msg(gss_auth, clnt, uid, gss_cred->gc_principal); if (IS_ERR(gss_new)) @@ -517,7 +519,7 @@ gss_refresh_upcall(struct rpc_task *task) int err = 0; dprintk("RPC: %5u %s for uid %u\n", - task->tk_pid, __func__, cred->cr_uid); + task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid)); gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { /* XXX: warning on the first, under the assumption we @@ -549,7 +551,8 @@ gss_refresh_upcall(struct rpc_task *task) gss_release_msg(gss_msg); out: dprintk("RPC: %5u %s for uid %u result %d\n", - task->tk_pid, __func__, cred->cr_uid, err); + task->tk_pid, __func__, + from_kuid(&init_user_ns, cred->cr_uid), err); return err; } @@ -562,7 +565,8 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) DEFINE_WAIT(wait); int err = 0; - dprintk("RPC: %s for uid %u\n", __func__, cred->cr_uid); + dprintk("RPC: %s for uid %u\n", + __func__, from_kuid(&init_user_ns, cred->cr_uid)); retry: gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { @@ -604,7 +608,7 @@ out_intr: gss_release_msg(gss_msg); out: dprintk("RPC: %s for uid %u result %d\n", - __func__, cred->cr_uid, err); + __func__, from_kuid(&init_user_ns, cred->cr_uid), err); return err; } @@ -616,9 +620,10 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) const void *p, *end; void *buf; struct gss_upcall_msg *gss_msg; - struct rpc_pipe *pipe = RPC_I(filp->f_dentry->d_inode)->pipe; + struct rpc_pipe *pipe = RPC_I(file_inode(filp))->pipe; struct gss_cl_ctx *ctx; - uid_t uid; + uid_t id; + kuid_t uid; ssize_t err = -EFBIG; if (mlen > MSG_BUF_MAXSIZE) @@ -633,12 +638,18 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) goto err; end = (const void *)((char *)buf + mlen); - p = simple_get_bytes(buf, end, &uid, sizeof(uid)); + p = simple_get_bytes(buf, end, &id, sizeof(id)); if (IS_ERR(p)) { err = PTR_ERR(p); goto err; } + uid = make_kuid(&init_user_ns, id); + if (!uid_valid(uid)) { + err = -EINVAL; + goto err; + } + err = -ENOMEM; ctx = gss_alloc_context(); if (ctx == NULL) @@ -1059,7 +1070,8 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) int err = -ENOMEM; dprintk("RPC: %s for uid %d, flavor %d\n", - __func__, acred->uid, auth->au_flavor); + __func__, from_kuid(&init_user_ns, acred->uid), + auth->au_flavor); if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS))) goto out_err; @@ -1115,7 +1127,7 @@ out: } if (gss_cred->gc_principal != NULL) return 0; - return rc->cr_uid == acred->uid; + return uid_eq(rc->cr_uid, acred->uid); } /* @@ -1154,7 +1166,7 @@ gss_marshal(struct rpc_task *task, __be32 *p) /* We compute the checksum for the verifier over the xdr-encoded bytes * starting with the xid and ending at the end of the credential: */ - iov.iov_base = xprt_skip_transport_header(task->tk_xprt, + iov.iov_base = xprt_skip_transport_header(req->rq_xprt, req->rq_snd_buf.head[0].iov_base); iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; xdr_buf_from_iov(&iov, &verf_buf); diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 107c452..88edec9 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -574,6 +574,8 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip; buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip; + /* Trim off the checksum blob */ + xdr_buf_trim(buf, GSS_KRB5_TOK_HDR_LEN + tailskip); return GSS_S_COMPLETE; } diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index b174fcd..f0f4eee 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -140,7 +140,7 @@ gss_mech_get(struct gss_api_mech *gm) EXPORT_SYMBOL_GPL(gss_mech_get); -struct gss_api_mech * +static struct gss_api_mech * _gss_mech_get_by_name(const char *name) { struct gss_api_mech *pos, *gm = NULL; @@ -205,7 +205,7 @@ mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor) return 0; } -struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor) +static struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor) { struct gss_api_mech *gm = NULL, *pos; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 73e9573..5ead605 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -182,12 +182,6 @@ static void rsi_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int rsi_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, rsi_request); -} - - static int rsi_parse(struct cache_detail *cd, char *mesg, int mlen) { @@ -275,7 +269,7 @@ static struct cache_detail rsi_cache_template = { .hash_size = RSI_HASHMAX, .name = "auth.rpcsec.init", .cache_put = rsi_put, - .cache_upcall = rsi_upcall, + .cache_request = rsi_request, .cache_parse = rsi_parse, .match = rsi_match, .init = rsi_init, @@ -418,6 +412,7 @@ static int rsc_parse(struct cache_detail *cd, { /* contexthandle expiry [ uid gid N <n gids> mechname ...mechdata... ] */ char *buf = mesg; + int id; int len, rv; struct rsc rsci, *rscp = NULL; time_t expiry; @@ -444,7 +439,7 @@ static int rsc_parse(struct cache_detail *cd, goto out; /* uid, or NEGATIVE */ - rv = get_int(&mesg, &rsci.cred.cr_uid); + rv = get_int(&mesg, &id); if (rv == -EINVAL) goto out; if (rv == -ENOENT) @@ -452,9 +447,21 @@ static int rsc_parse(struct cache_detail *cd, else { int N, i; + /* + * NOTE: we skip uid_valid()/gid_valid() checks here: + * instead, * -1 id's are later mapped to the + * (export-specific) anonymous id by nfsd_setuser. + * + * (But supplementary gid's get no such special + * treatment so are checked for validity here.) + */ + /* uid */ + rsci.cred.cr_uid = make_kuid(&init_user_ns, id); + /* gid */ - if (get_int(&mesg, &rsci.cred.cr_gid)) + if (get_int(&mesg, &id)) goto out; + rsci.cred.cr_gid = make_kgid(&init_user_ns, id); /* number of additional gid's */ if (get_int(&mesg, &N)) @@ -467,11 +474,10 @@ static int rsc_parse(struct cache_detail *cd, /* gid's */ status = -EINVAL; for (i=0; i<N; i++) { - gid_t gid; kgid_t kgid; - if (get_int(&mesg, &gid)) + if (get_int(&mesg, &id)) goto out; - kgid = make_kgid(&init_user_ns, gid); + kgid = make_kgid(&init_user_ns, id); if (!gid_valid(kgid)) goto out; GROUP_AT(rsci.cred.cr_group_info, i) = kgid; @@ -817,13 +823,17 @@ read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) * The server uses base of head iovec as read pointer, while the * client uses separate pointer. */ static int -unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) +unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) { int stat = -EINVAL; u32 integ_len, maj_stat; struct xdr_netobj mic; struct xdr_buf integ_buf; + /* Did we already verify the signature on the original pass through? */ + if (rqstp->rq_deferred) + return 0; + integ_len = svc_getnl(&buf->head[0]); if (integ_len & 3) return stat; @@ -846,6 +856,8 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) goto out; if (svc_getnl(&buf->head[0]) != seq) goto out; + /* trim off the mic at the end before returning */ + xdr_buf_trim(buf, mic.len + 4); stat = 0; out: kfree(mic.data); @@ -1190,7 +1202,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); - if (unwrap_integ_data(&rqstp->rq_arg, + if (unwrap_integ_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; break; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 52c5abd..dc37021 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -18,8 +18,8 @@ struct unx_cred { struct rpc_cred uc_base; - gid_t uc_gid; - gid_t uc_gids[NFS_NGROUPS]; + kgid_t uc_gid; + kgid_t uc_gids[NFS_NGROUPS]; }; #define uc_uid uc_base.cr_uid @@ -65,7 +65,8 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) unsigned int i; dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", - acred->uid, acred->gid); + from_kuid(&init_user_ns, acred->uid), + from_kgid(&init_user_ns, acred->gid)); if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS))) return ERR_PTR(-ENOMEM); @@ -79,13 +80,10 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) groups = NFS_NGROUPS; cred->uc_gid = acred->gid; - for (i = 0; i < groups; i++) { - gid_t gid; - gid = from_kgid(&init_user_ns, GROUP_AT(acred->group_info, i)); - cred->uc_gids[i] = gid; - } + for (i = 0; i < groups; i++) + cred->uc_gids[i] = GROUP_AT(acred->group_info, i); if (i < NFS_NGROUPS) - cred->uc_gids[i] = NOGROUP; + cred->uc_gids[i] = INVALID_GID; return &cred->uc_base; } @@ -123,21 +121,17 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) unsigned int i; - if (cred->uc_uid != acred->uid || cred->uc_gid != acred->gid) + if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid)) return 0; if (acred->group_info != NULL) groups = acred->group_info->ngroups; if (groups > NFS_NGROUPS) groups = NFS_NGROUPS; - for (i = 0; i < groups ; i++) { - gid_t gid; - gid = from_kgid(&init_user_ns, GROUP_AT(acred->group_info, i)); - if (cred->uc_gids[i] != gid) + for (i = 0; i < groups ; i++) + if (!gid_eq(cred->uc_gids[i], GROUP_AT(acred->group_info, i))) return 0; - } - if (groups < NFS_NGROUPS && - cred->uc_gids[groups] != NOGROUP) + if (groups < NFS_NGROUPS && gid_valid(cred->uc_gids[groups])) return 0; return 1; } @@ -163,11 +157,11 @@ unx_marshal(struct rpc_task *task, __be32 *p) */ p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); - *p++ = htonl((u32) cred->uc_uid); - *p++ = htonl((u32) cred->uc_gid); + *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid)); + *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid)); hold = p++; - for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++) - *p++ = htonl((u32) cred->uc_gids[i]); + for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++) + *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i])); *hold = htonl(p - hold - 1); /* gid array length */ *base = htonl((p - base - 1) << 2); /* cred length */ diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 9afa439..25d58e76 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -196,9 +196,9 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_update); static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) { - if (!cd->cache_upcall) - return -EINVAL; - return cd->cache_upcall(cd, h); + if (cd->cache_upcall) + return cd->cache_upcall(cd, h); + return sunrpc_cache_pipe_upcall(cd, h); } static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) @@ -670,13 +670,13 @@ static void cache_revisit_request(struct cache_head *item) { struct cache_deferred_req *dreq; struct list_head pending; - struct hlist_node *lp, *tmp; + struct hlist_node *tmp; int hash = DFR_HASH(item); INIT_LIST_HEAD(&pending); spin_lock(&cache_defer_lock); - hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash) + hlist_for_each_entry_safe(dreq, tmp, &cache_defer_hash[hash], hash) if (dreq->item == item) { __unhash_deferred_req(dreq); list_add(&dreq->recent, &pending); @@ -750,12 +750,24 @@ struct cache_reader { int offset; /* if non-0, we have a refcnt on next request */ }; +static int cache_request(struct cache_detail *detail, + struct cache_request *crq) +{ + char *bp = crq->buf; + int len = PAGE_SIZE; + + detail->cache_request(detail, crq->item, &bp, &len); + if (len < 0) + return -EAGAIN; + return PAGE_SIZE - len; +} + static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { struct cache_reader *rp = filp->private_data; struct cache_request *rq; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int err; if (count == 0) @@ -784,6 +796,13 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, rq->readers++; spin_unlock(&queue_lock); + if (rq->len == 0) { + err = cache_request(cd, rq); + if (err < 0) + goto out; + rq->len = err; + } + if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) { err = -EAGAIN; spin_lock(&queue_lock); @@ -886,7 +905,7 @@ static ssize_t cache_write(struct file *filp, const char __user *buf, struct cache_detail *cd) { struct address_space *mapping = filp->f_mapping; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); ssize_t ret = -EINVAL; if (!cd->cache_parse) @@ -1140,17 +1159,14 @@ static bool cache_listeners_exist(struct cache_detail *detail) * * Each request is at most one page long. */ -int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, - void (*cache_request)(struct cache_detail *, - struct cache_head *, - char **, - int *)) +int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) { char *buf; struct cache_request *crq; - char *bp; - int len; + + if (!detail->cache_request) + return -EINVAL; if (!cache_listeners_exist(detail)) { warn_no_listener(detail); @@ -1167,19 +1183,10 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, return -EAGAIN; } - bp = buf; len = PAGE_SIZE; - - cache_request(detail, h, &bp, &len); - - if (len < 0) { - kfree(buf); - kfree(crq); - return -EAGAIN; - } crq->q.reader = 0; crq->item = cache_get(h); crq->buf = buf; - crq->len = PAGE_SIZE - len; + crq->len = 0; crq->readers = 0; spin_lock(&queue_lock); list_add_tail(&crq->q.list, &detail->queue); @@ -1454,7 +1461,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf, static ssize_t cache_read_procfs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + struct cache_detail *cd = PDE(file_inode(filp))->data; return cache_read(filp, buf, count, ppos, cd); } @@ -1462,14 +1469,14 @@ static ssize_t cache_read_procfs(struct file *filp, char __user *buf, static ssize_t cache_write_procfs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + struct cache_detail *cd = PDE(file_inode(filp))->data; return cache_write(filp, buf, count, ppos, cd); } static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait) { - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + struct cache_detail *cd = PDE(file_inode(filp))->data; return cache_poll(filp, wait, cd); } @@ -1477,7 +1484,7 @@ static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait) static long cache_ioctl_procfs(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct cache_detail *cd = PDE(inode)->data; return cache_ioctl(inode, filp, cmd, arg, cd); @@ -1546,7 +1553,7 @@ static int release_flush_procfs(struct inode *inode, struct file *filp) static ssize_t read_flush_procfs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + struct cache_detail *cd = PDE(file_inode(filp))->data; return read_flush(filp, buf, count, ppos, cd); } @@ -1555,7 +1562,7 @@ static ssize_t write_flush_procfs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + struct cache_detail *cd = PDE(file_inode(filp))->data; return write_flush(filp, buf, count, ppos, cd); } @@ -1605,7 +1612,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) if (p == NULL) goto out_nomem; - if (cd->cache_upcall || cd->cache_parse) { + if (cd->cache_request || cd->cache_parse) { p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, cd->u.procfs.proc_ent, &cache_file_operations_procfs, cd); @@ -1614,7 +1621,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) goto out_nomem; } if (cd->cache_show) { - p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, + p = proc_create_data("content", S_IFREG|S_IRUSR, cd->u.procfs.proc_ent, &content_file_operations_procfs, cd); cd->u.procfs.content_ent = p; @@ -1686,7 +1693,7 @@ EXPORT_SYMBOL_GPL(cache_destroy_net); static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + struct cache_detail *cd = RPC_I(file_inode(filp))->private; return cache_read(filp, buf, count, ppos, cd); } @@ -1694,14 +1701,14 @@ static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + struct cache_detail *cd = RPC_I(file_inode(filp))->private; return cache_write(filp, buf, count, ppos, cd); } static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait) { - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + struct cache_detail *cd = RPC_I(file_inode(filp))->private; return cache_poll(filp, wait, cd); } @@ -1709,7 +1716,7 @@ static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait) static long cache_ioctl_pipefs(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct cache_detail *cd = RPC_I(inode)->private; return cache_ioctl(inode, filp, cmd, arg, cd); @@ -1778,7 +1785,7 @@ static int release_flush_pipefs(struct inode *inode, struct file *filp) static ssize_t read_flush_pipefs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + struct cache_detail *cd = RPC_I(file_inode(filp))->private; return read_flush(filp, buf, count, ppos, cd); } @@ -1787,7 +1794,7 @@ static ssize_t write_flush_pipefs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + struct cache_detail *cd = RPC_I(file_inode(filp))->private; return write_flush(filp, buf, count, ppos, cd); } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 822f020..d5f35f1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -33,6 +33,7 @@ #include <linux/rcupdate.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/metrics.h> #include <linux/sunrpc/bc_xprt.h> @@ -303,10 +304,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru err = rpciod_up(); if (err) goto out_no_rpciod; - err = -EINVAL; - if (!xprt) - goto out_no_xprt; + err = -EINVAL; if (args->version >= program->nrvers) goto out_err; version = program->version[args->version]; @@ -381,10 +380,9 @@ out_no_principal: out_no_stats: kfree(clnt); out_err: - xprt_put(xprt); -out_no_xprt: rpciod_down(); out_no_rpciod: + xprt_put(xprt); return ERR_PTR(err); } @@ -511,7 +509,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new = rpc_new_client(args, xprt); if (IS_ERR(new)) { err = PTR_ERR(new); - goto out_put; + goto out_err; } atomic_inc(&clnt->cl_count); @@ -524,8 +522,6 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new->cl_chatty = clnt->cl_chatty; return new; -out_put: - xprt_put(xprt); out_err: dprintk("RPC: %s: returned error %d\n", __func__, err); return ERR_PTR(err); @@ -555,7 +551,7 @@ EXPORT_SYMBOL_GPL(rpc_clone_client); * rpc_clone_client_set_auth - Clone an RPC client structure and set its auth * * @clnt: RPC client whose parameters are copied - * @auth: security flavor for new client + * @flavor: security flavor for new client * * Returns a fresh RPC client or an ERR_PTR. */ @@ -610,11 +606,6 @@ EXPORT_SYMBOL_GPL(rpc_killall_tasks); */ void rpc_shutdown_client(struct rpc_clnt *clnt) { - /* - * To avoid deadlock, never call rpc_shutdown_client from a - * workqueue context! - */ - WARN_ON_ONCE(current->flags & PF_WQ_WORKER); might_sleep(); dprintk_rcu("RPC: shutting down %s client for %s\n", @@ -1201,6 +1192,21 @@ size_t rpc_max_payload(struct rpc_clnt *clnt) EXPORT_SYMBOL_GPL(rpc_max_payload); /** + * rpc_get_timeout - Get timeout for transport in units of HZ + * @clnt: RPC client to query + */ +unsigned long rpc_get_timeout(struct rpc_clnt *clnt) +{ + unsigned long ret; + + rcu_read_lock(); + ret = rcu_dereference(clnt->cl_xprt)->timeout->to_initval; + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(rpc_get_timeout); + +/** * rpc_force_rebind - force transport to check that remote port is unchanged * @clnt: client to rebind * @@ -1405,7 +1411,7 @@ call_allocate(struct rpc_task *task) { unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack; struct rpc_rqst *req = task->tk_rqstp; - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt = req->rq_xprt; struct rpc_procinfo *proc = task->tk_msg.rpc_proc; dprint_status(task); @@ -1513,7 +1519,7 @@ rpc_xdr_encode(struct rpc_task *task) static void call_bind(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; dprint_status(task); @@ -1607,7 +1613,7 @@ retry_timeout: static void call_connect(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; dprintk("RPC: %5u call_connect xprt %p %s connected\n", task->tk_pid, xprt, @@ -1690,7 +1696,7 @@ call_transmit(struct rpc_task *task) if (rpc_reply_expected(task)) return; task->tk_action = rpc_exit_task; - rpc_wake_up_queued_task(&task->tk_xprt->pending, task); + rpc_wake_up_queued_task(&task->tk_rqstp->rq_xprt->pending, task); } /* @@ -1789,7 +1795,7 @@ call_bc_transmit(struct rpc_task *task) */ printk(KERN_NOTICE "RPC: Could not send backchannel reply " "error: %d\n", task->tk_status); - xprt_conditional_disconnect(task->tk_xprt, + xprt_conditional_disconnect(req->rq_xprt, req->rq_connect_cookie); break; default: @@ -1841,7 +1847,7 @@ call_status(struct rpc_task *task) case -ETIMEDOUT: task->tk_action = call_timeout; if (task->tk_client->cl_discrtry) - xprt_conditional_disconnect(task->tk_xprt, + xprt_conditional_disconnect(req->rq_xprt, req->rq_connect_cookie); break; case -ECONNRESET: @@ -1996,7 +2002,7 @@ out_retry: if (task->tk_rqstp == req) { req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0; if (task->tk_client->cl_discrtry) - xprt_conditional_disconnect(task->tk_xprt, + xprt_conditional_disconnect(req->rq_xprt, req->rq_connect_cookie); } } @@ -2010,7 +2016,7 @@ rpc_encode_header(struct rpc_task *task) /* FIXME: check buffer size? */ - p = xprt_skip_transport_header(task->tk_xprt, p); + p = xprt_skip_transport_header(req->rq_xprt, p); *p++ = req->rq_xid; /* XID */ *p++ = htonl(RPC_CALL); /* CALL */ *p++ = htonl(RPC_VERSION); /* RPC version */ diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index fd10981..a9129f8 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -284,7 +284,7 @@ out: static ssize_t rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct rpc_pipe *pipe; struct rpc_pipe_msg *msg; int res = 0; @@ -328,7 +328,7 @@ out_unlock: static ssize_t rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int res; mutex_lock(&inode->i_mutex); @@ -342,7 +342,7 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of static unsigned int rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct rpc_inode *rpci = RPC_I(inode); unsigned int mask = POLLOUT | POLLWRNORM; @@ -360,7 +360,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) static long rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct rpc_pipe *pipe; int len; @@ -830,7 +830,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, * responses to upcalls. They will result in calls to @msg->downcall. * * The @private argument passed here will be available to all these methods - * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private. + * from the file pointer, via RPC_I(file_inode(file))->private. */ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, void *private, struct rpc_pipe *pipe) @@ -1174,6 +1174,8 @@ static struct file_system_type rpc_pipe_fs_type = { .mount = rpc_mount, .kill_sb = rpc_kill_sb, }; +MODULE_ALIAS_FS("rpc_pipefs"); +MODULE_ALIAS("rpc_pipefs"); static void init_once(void *foo) @@ -1218,6 +1220,3 @@ void unregister_rpc_pipefs(void) kmem_cache_destroy(rpc_inode_cachep); unregister_filesystem(&rpc_pipe_fs_type); } - -/* Make 'mount -t rpc_pipefs ...' autoload this module. */ -MODULE_ALIAS("rpc_pipefs"); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 795a0f4..3df764d 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -26,6 +26,7 @@ #include <net/ipv6.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/xprtsock.h> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index d17a704..f8529fc 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -98,9 +98,25 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } +static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue) +{ + struct list_head *q = &queue->tasks[queue->priority]; + struct rpc_task *task; + + if (!list_empty(q)) { + task = list_first_entry(q, struct rpc_task, u.tk_wait.list); + if (task->tk_owner == queue->owner) + list_move_tail(&task->u.tk_wait.list, q); + } +} + static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) { - queue->priority = priority; + if (queue->priority != priority) { + /* Fairness: rotate the list when changing priority */ + rpc_rotate_queue_owner(queue); + queue->priority = priority; + } } static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) @@ -164,6 +180,8 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); task->tk_waitqueue = queue; queue->qlen++; + /* barrier matches the read in rpc_wake_up_task_queue_locked() */ + smp_wmb(); rpc_set_queued(task); dprintk("RPC: %5u added to queue %p \"%s\"\n", @@ -414,8 +432,11 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task */ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) - __rpc_do_wake_up_task(queue, task); + if (RPC_IS_QUEUED(task)) { + smp_rmb(); + if (task->tk_waitqueue == queue) + __rpc_do_wake_up_task(queue, task); + } } /* @@ -934,16 +955,35 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) return task; } +/* + * rpc_free_task - release rpc task and perform cleanups + * + * Note that we free up the rpc_task _after_ rpc_release_calldata() + * in order to work around a workqueue dependency issue. + * + * Tejun Heo states: + * "Workqueue currently considers two work items to be the same if they're + * on the same address and won't execute them concurrently - ie. it + * makes a work item which is queued again while being executed wait + * for the previous execution to complete. + * + * If a work function frees the work item, and then waits for an event + * which should be performed by another work item and *that* work item + * recycles the freed work item, it can create a false dependency loop. + * There really is no reliable way to detect this short of verifying + * every memory free." + * + */ static void rpc_free_task(struct rpc_task *task) { - const struct rpc_call_ops *tk_ops = task->tk_ops; - void *calldata = task->tk_calldata; + unsigned short tk_flags = task->tk_flags; + + rpc_release_calldata(task->tk_ops, task->tk_calldata); - if (task->tk_flags & RPC_TASK_DYNAMIC) { + if (tk_flags & RPC_TASK_DYNAMIC) { dprintk("RPC: %5u freeing task\n", task->tk_pid); mempool_free(task, rpc_task_mempool); } - rpc_release_calldata(tk_ops, calldata); } static void rpc_async_release(struct work_struct *work) @@ -953,8 +993,7 @@ static void rpc_async_release(struct work_struct *work) static void rpc_release_resources_task(struct rpc_task *task) { - if (task->tk_rqstp) - xprt_release(task); + xprt_release(task); if (task->tk_msg.rpc_cred) { put_rpccred(task->tk_msg.rpc_cred); task->tk_msg.rpc_cred = NULL; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index dbf12ac..89a588b 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -515,15 +515,6 @@ EXPORT_SYMBOL_GPL(svc_create_pooled); void svc_shutdown_net(struct svc_serv *serv, struct net *net) { - /* - * The set of xprts (contained in the sv_tempsocks and - * sv_permsocks lists) is now constant, since it is modified - * only by accepting new sockets (done by service threads in - * svc_recv) or aging old ones (done by sv_temptimer), or - * configuration changes (excluded by whatever locking the - * caller is using--nfsd_mutex in the case of nfsd). So it's - * safe to traverse those lists and shut everything down: - */ svc_close_net(serv, net); if (serv->sv_shutdown) @@ -1042,6 +1033,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) /* * dprintk the given error with the address of the client that caused it. */ +#ifdef RPC_DEBUG static __printf(2, 3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) { @@ -1058,6 +1050,9 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) va_end(args); } +#else +static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} +#endif /* * Common routine for processing the RPC request. diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b8e47fa..80a6640 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -499,7 +499,8 @@ void svc_wake_up(struct svc_serv *serv) rqstp->rq_xprt = NULL; */ wake_up(&rqstp->rq_wait); - } + } else + pool->sp_task_pending = 1; spin_unlock_bh(&pool->sp_lock); } } @@ -634,7 +635,13 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) * long for cache updates. */ rqstp->rq_chandle.thread_wait = 1*HZ; + pool->sp_task_pending = 0; } else { + if (pool->sp_task_pending) { + pool->sp_task_pending = 0; + spin_unlock_bh(&pool->sp_lock); + return ERR_PTR(-EAGAIN); + } /* No data pending. Go to sleep */ svc_thread_enqueue(pool, rqstp); @@ -856,7 +863,6 @@ static void svc_age_temp_xprts(unsigned long closure) struct svc_serv *serv = (struct svc_serv *)closure; struct svc_xprt *xprt; struct list_head *le, *next; - LIST_HEAD(to_be_aged); dprintk("svc_age_temp_xprts\n"); @@ -877,25 +883,15 @@ static void svc_age_temp_xprts(unsigned long closure) if (atomic_read(&xprt->xpt_ref.refcount) > 1 || test_bit(XPT_BUSY, &xprt->xpt_flags)) continue; - svc_xprt_get(xprt); - list_move(le, &to_be_aged); + list_del_init(le); set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_DETACHED, &xprt->xpt_flags); - } - spin_unlock_bh(&serv->sv_lock); - - while (!list_empty(&to_be_aged)) { - le = to_be_aged.next; - /* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */ - list_del_init(le); - xprt = list_entry(le, struct svc_xprt, xpt_list); - dprintk("queuing xprt %p for closing\n", xprt); /* a thread will dequeue and close it soon */ svc_xprt_enqueue(xprt); - svc_xprt_put(xprt); } + spin_unlock_bh(&serv->sv_lock); mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); } @@ -959,21 +955,24 @@ void svc_close_xprt(struct svc_xprt *xprt) } EXPORT_SYMBOL_GPL(svc_close_xprt); -static void svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) +static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) { struct svc_xprt *xprt; + int ret = 0; spin_lock(&serv->sv_lock); list_for_each_entry(xprt, xprt_list, xpt_list) { if (xprt->xpt_net != net) continue; + ret++; set_bit(XPT_CLOSE, &xprt->xpt_flags); - set_bit(XPT_BUSY, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); } spin_unlock(&serv->sv_lock); + return ret; } -static void svc_clear_pools(struct svc_serv *serv, struct net *net) +static struct svc_xprt *svc_dequeue_net(struct svc_serv *serv, struct net *net) { struct svc_pool *pool; struct svc_xprt *xprt; @@ -988,42 +987,46 @@ static void svc_clear_pools(struct svc_serv *serv, struct net *net) if (xprt->xpt_net != net) continue; list_del_init(&xprt->xpt_ready); + spin_unlock_bh(&pool->sp_lock); + return xprt; } spin_unlock_bh(&pool->sp_lock); } + return NULL; } -static void svc_clear_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) +static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) { struct svc_xprt *xprt; - struct svc_xprt *tmp; - LIST_HEAD(victims); - spin_lock(&serv->sv_lock); - list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { - if (xprt->xpt_net != net) - continue; - list_move(&xprt->xpt_list, &victims); - } - spin_unlock(&serv->sv_lock); - - list_for_each_entry_safe(xprt, tmp, &victims, xpt_list) + while ((xprt = svc_dequeue_net(serv, net))) { + set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_delete_xprt(xprt); + } } +/* + * Server threads may still be running (especially in the case where the + * service is still running in other network namespaces). + * + * So we shut down sockets the same way we would on a running server, by + * setting XPT_CLOSE, enqueuing, and letting a thread pick it up to do + * the close. In the case there are no such other threads, + * threads running, svc_clean_up_xprts() does a simple version of a + * server's main event loop, and in the case where there are other + * threads, we may need to wait a little while and then check again to + * see if they're done. + */ void svc_close_net(struct svc_serv *serv, struct net *net) { - svc_close_list(serv, &serv->sv_tempsocks, net); - svc_close_list(serv, &serv->sv_permsocks, net); + int delay = 0; - svc_clear_pools(serv, net); - /* - * At this point the sp_sockets lists will stay empty, since - * svc_xprt_enqueue will not add new entries without taking the - * sp_lock and checking XPT_BUSY. - */ - svc_clear_list(serv, &serv->sv_tempsocks, net); - svc_clear_list(serv, &serv->sv_permsocks, net); + while (svc_close_list(serv, &serv->sv_permsocks, net) + + svc_close_list(serv, &serv->sv_tempsocks, net)) { + + svc_clean_up_xprts(serv, net); + msleep(delay++); + } } /* diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 7963569..2af7b0c 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -138,13 +138,12 @@ auth_domain_lookup(char *name, struct auth_domain *new) { struct auth_domain *hp; struct hlist_head *head; - struct hlist_node *np; head = &auth_domain_table[hash_str(name, DN_HASHBITS)]; spin_lock(&auth_domain_lock); - hlist_for_each_entry(hp, np, head, hash) { + hlist_for_each_entry(hp, head, hash) { if (strcmp(hp->name, name)==0) { kref_get(&hp->ref); spin_unlock(&auth_domain_lock); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 4d01292..c3f9e1e 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -6,6 +6,7 @@ #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/svcauth.h> #include <linux/sunrpc/gss_api.h> +#include <linux/sunrpc/addr.h> #include <linux/err.h> #include <linux/seq_file.h> #include <linux/hash.h> @@ -17,7 +18,6 @@ #include <linux/user_namespace.h> #define RPCDBG_FACILITY RPCDBG_AUTH -#include <linux/sunrpc/clnt.h> #include "netns.h" @@ -157,11 +157,6 @@ static void ip_map_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); -} - static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr); static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry); @@ -415,10 +410,15 @@ svcauth_unix_info_release(struct svc_xprt *xpt) struct unix_gid { struct cache_head h; - uid_t uid; + kuid_t uid; struct group_info *gi; }; +static int unix_gid_hash(kuid_t uid) +{ + return hash_long(from_kuid(&init_user_ns, uid), GID_HASHBITS); +} + static void unix_gid_put(struct kref *kref) { struct cache_head *item = container_of(kref, struct cache_head, ref); @@ -433,7 +433,7 @@ static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew) { struct unix_gid *orig = container_of(corig, struct unix_gid, h); struct unix_gid *new = container_of(cnew, struct unix_gid, h); - return orig->uid == new->uid; + return uid_eq(orig->uid, new->uid); } static void unix_gid_init(struct cache_head *cnew, struct cache_head *citem) { @@ -465,23 +465,19 @@ static void unix_gid_request(struct cache_detail *cd, char tuid[20]; struct unix_gid *ug = container_of(h, struct unix_gid, h); - snprintf(tuid, 20, "%u", ug->uid); + snprintf(tuid, 20, "%u", from_kuid(&init_user_ns, ug->uid)); qword_add(bpp, blen, tuid); (*bpp)[-1] = '\n'; } -static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request); -} - -static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid); +static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid); static int unix_gid_parse(struct cache_detail *cd, char *mesg, int mlen) { /* uid expiry Ngid gid0 gid1 ... gidN-1 */ - int uid; + int id; + kuid_t uid; int gids; int rv; int i; @@ -493,9 +489,12 @@ static int unix_gid_parse(struct cache_detail *cd, return -EINVAL; mesg[mlen-1] = 0; - rv = get_int(&mesg, &uid); + rv = get_int(&mesg, &id); if (rv) return -EINVAL; + uid = make_kuid(&init_user_ns, id); + if (!uid_valid(uid)) + return -EINVAL; ug.uid = uid; expiry = get_expiry(&mesg); @@ -530,7 +529,7 @@ static int unix_gid_parse(struct cache_detail *cd, ug.h.expiry_time = expiry; ch = sunrpc_cache_update(cd, &ug.h, &ugp->h, - hash_long(uid, GID_HASHBITS)); + unix_gid_hash(uid)); if (!ch) err = -ENOMEM; else { @@ -549,7 +548,7 @@ static int unix_gid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) { - struct user_namespace *user_ns = current_user_ns(); + struct user_namespace *user_ns = &init_user_ns; struct unix_gid *ug; int i; int glen; @@ -565,7 +564,7 @@ static int unix_gid_show(struct seq_file *m, else glen = 0; - seq_printf(m, "%u %d:", ug->uid, glen); + seq_printf(m, "%u %d:", from_kuid_munged(user_ns, ug->uid), glen); for (i = 0; i < glen; i++) seq_printf(m, " %d", from_kgid_munged(user_ns, GROUP_AT(ug->gi, i))); seq_printf(m, "\n"); @@ -577,7 +576,7 @@ static struct cache_detail unix_gid_cache_template = { .hash_size = GID_HASHMAX, .name = "auth.unix.gid", .cache_put = unix_gid_put, - .cache_upcall = unix_gid_upcall, + .cache_request = unix_gid_request, .cache_parse = unix_gid_parse, .cache_show = unix_gid_show, .match = unix_gid_match, @@ -615,20 +614,20 @@ void unix_gid_cache_destroy(struct net *net) cache_destroy_net(cd, net); } -static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid) +static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid) { struct unix_gid ug; struct cache_head *ch; ug.uid = uid; - ch = sunrpc_cache_lookup(cd, &ug.h, hash_long(uid, GID_HASHBITS)); + ch = sunrpc_cache_lookup(cd, &ug.h, unix_gid_hash(uid)); if (ch) return container_of(ch, struct unix_gid, h); else return NULL; } -static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp) +static struct group_info *unix_gid_find(kuid_t uid, struct svc_rqst *rqstp) { struct unix_gid *ug; struct group_info *gi; @@ -750,8 +749,8 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) } /* Signal that mapping to nobody uid/gid is required */ - cred->cr_uid = (uid_t) -1; - cred->cr_gid = (gid_t) -1; + cred->cr_uid = INVALID_UID; + cred->cr_gid = INVALID_GID; cred->cr_group_info = groups_alloc(0); if (cred->cr_group_info == NULL) return SVC_CLOSE; /* kmalloc failure - client must retry */ @@ -812,8 +811,10 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */ argv->iov_len -= slen*4; - cred->cr_uid = svc_getnl(argv); /* uid */ - cred->cr_gid = svc_getnl(argv); /* gid */ + cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */ + cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */ + if (!uid_valid(cred->cr_uid) || !gid_valid(cred->cr_gid)) + goto badcred; slen = svc_getnl(argv); /* gids length */ if (slen > 16 || (len -= (slen + 2)*4) < 0) goto badcred; @@ -874,7 +875,7 @@ static struct cache_detail ip_map_cache_template = { .hash_size = IP_HASHMAX, .name = "auth.unix.ip", .cache_put = ip_map_put, - .cache_upcall = ip_map_upcall, + .cache_request = ip_map_request, .cache_parse = ip_map_parse, .cache_show = ip_map_show, .match = ip_map_match, diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0a148c9..0f679df 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -465,7 +465,7 @@ static int svc_udp_get_dest_address4(struct svc_rqst *rqstp, } /* - * See net/ipv6/datagram.c : datagram_recv_ctl + * See net/ipv6/datagram.c : ip6_datagram_recv_ctl */ static int svc_udp_get_dest_address6(struct svc_rqst *rqstp, struct cmsghdr *cmh) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 5605563..75edcfa 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -879,6 +879,47 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, } EXPORT_SYMBOL_GPL(xdr_buf_subsegment); +/** + * xdr_buf_trim - lop at most "len" bytes off the end of "buf" + * @buf: buf to be trimmed + * @len: number of bytes to reduce "buf" by + * + * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note + * that it's possible that we'll trim less than that amount if the xdr_buf is + * too small, or if (for instance) it's all in the head and the parser has + * already read too far into it. + */ +void xdr_buf_trim(struct xdr_buf *buf, unsigned int len) +{ + size_t cur; + unsigned int trim = len; + + if (buf->tail[0].iov_len) { + cur = min_t(size_t, buf->tail[0].iov_len, trim); + buf->tail[0].iov_len -= cur; + trim -= cur; + if (!trim) + goto fix_len; + } + + if (buf->page_len) { + cur = min_t(unsigned int, buf->page_len, trim); + buf->page_len -= cur; + trim -= cur; + if (!trim) + goto fix_len; + } + + if (buf->head[0].iov_len) { + cur = min_t(size_t, buf->head[0].iov_len, trim); + buf->head[0].iov_len -= cur; + trim -= cur; + } +fix_len: + buf->len -= (len - trim); +} +EXPORT_SYMBOL_GPL(xdr_buf_trim); + static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) { unsigned int this_len; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index bd462a5..b7478d5 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -430,21 +430,23 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) */ void xprt_release_rqst_cong(struct rpc_task *task) { - __xprt_put_cong(task->tk_xprt, task->tk_rqstp); + struct rpc_rqst *req = task->tk_rqstp; + + __xprt_put_cong(req->rq_xprt, req); } EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); /** * xprt_adjust_cwnd - adjust transport congestion window + * @xprt: pointer to xprt * @task: recently completed RPC request used to adjust window * @result: result code of completed RPC request * * We use a time-smoothed congestion estimator to avoid heavy oscillation. */ -void xprt_adjust_cwnd(struct rpc_task *task, int result) +void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result) { struct rpc_rqst *req = task->tk_rqstp; - struct rpc_xprt *xprt = task->tk_xprt; unsigned long cwnd = xprt->cwnd; if (result >= 0 && cwnd <= xprt->cong) { @@ -485,13 +487,17 @@ EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks); * xprt_wait_for_buffer_space - wait for transport output buffer to clear * @task: task to be put to sleep * @action: function pointer to be executed after wait + * + * Note that we only set the timer for the case of RPC_IS_SOFT(), since + * we don't in general want to force a socket disconnection due to + * an incomplete RPC call transmission. */ void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - task->tk_timeout = req->rq_timeout; + task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0; rpc_sleep_on(&xprt->pending, task, action); } EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); @@ -695,7 +701,7 @@ out_abort: */ void xprt_connect(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; dprintk("RPC: %5u xprt_connect xprt %p %s connected\n", task->tk_pid, xprt, (xprt_connected(xprt) ? "is" : "is not")); @@ -722,13 +728,13 @@ void xprt_connect(struct rpc_task *task) if (xprt_test_and_set_connecting(xprt)) return; xprt->stat.connect_start = jiffies; - xprt->ops->connect(task); + xprt->ops->connect(xprt, task); } } static void xprt_connect_status(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; if (task->tk_status == 0) { xprt->stat.connect_count++; @@ -832,7 +838,7 @@ static void xprt_timer(struct rpc_task *task) spin_lock_bh(&xprt->transport_lock); if (!req->rq_reply_bytes_recvd) { if (xprt->ops->timer) - xprt->ops->timer(task); + xprt->ops->timer(xprt, task); } else task->tk_status = 0; spin_unlock_bh(&xprt->transport_lock); @@ -1091,7 +1097,7 @@ EXPORT_SYMBOL_GPL(xprt_free); */ void xprt_reserve(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt; task->tk_status = 0; if (task->tk_rqstp != NULL) @@ -1099,7 +1105,10 @@ void xprt_reserve(struct rpc_task *task) task->tk_timeout = 0; task->tk_status = -EAGAIN; + rcu_read_lock(); + xprt = rcu_dereference(task->tk_client->cl_xprt); xprt->ops->alloc_slot(xprt, task); + rcu_read_unlock(); } static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) @@ -1136,10 +1145,18 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) void xprt_release(struct rpc_task *task) { struct rpc_xprt *xprt; - struct rpc_rqst *req; + struct rpc_rqst *req = task->tk_rqstp; - if (!(req = task->tk_rqstp)) + if (req == NULL) { + if (task->tk_client) { + rcu_read_lock(); + xprt = rcu_dereference(task->tk_client->cl_xprt); + if (xprt->snd_task == task) + xprt_release_write(xprt, task); + rcu_read_unlock(); + } return; + } xprt = req->rq_xprt; if (task->tk_ops->rpc_count_stats != NULL) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 558fbab..e03725b 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -171,7 +171,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type) { struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt); + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); int nsegs, nchunks = 0; unsigned int pos; struct rpcrdma_mr_seg *seg = req->rl_segments; @@ -366,7 +366,7 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) int rpcrdma_marshal_req(struct rpc_rqst *rqst) { - struct rpc_xprt *xprt = rqst->rq_task->tk_xprt; + struct rpc_xprt *xprt = rqst->rq_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); char *base; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index c9aa7a3..794312f 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -51,6 +51,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/seq_file.h> +#include <linux/sunrpc/addr.h> #include "xprt_rdma.h" @@ -426,9 +427,8 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) } static void -xprt_rdma_connect(struct rpc_task *task) +xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) { - struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); if (r_xprt->rx_ep.rep_connected != 0) { @@ -475,7 +475,7 @@ xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) static void * xprt_rdma_allocate(struct rpc_task *task, size_t size) { - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; struct rpcrdma_req *req, *nreq; req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); @@ -627,7 +627,7 @@ static int xprt_rdma_send_request(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt = rqst->rq_xprt; struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 745973b..93726560 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1086,7 +1086,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, case RPCRDMA_MEMWINDOWS: /* Allocate one extra request's worth, for full cycling */ for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { - r->r.mw = ib_alloc_mw(ia->ri_pd); + r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1); if (IS_ERR(r->r.mw)) { rc = PTR_ERR(r->r.mw); dprintk("RPC: %s: ib_alloc_mw" @@ -1673,12 +1673,12 @@ rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, *nsegs = 1; rpcrdma_map_one(ia, seg, writing); - param.mr = ia->ri_bind_mem; + param.bind_info.mr = ia->ri_bind_mem; param.wr_id = 0ULL; /* no send cookie */ - param.addr = seg->mr_dma; - param.length = seg->mr_len; + param.bind_info.addr = seg->mr_dma; + param.bind_info.length = seg->mr_len; param.send_flags = 0; - param.mw_access_flags = mem_priv; + param.bind_info.mw_access_flags = mem_priv; DECR_CQCOUNT(&r_xprt->rx_ep); rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); @@ -1690,7 +1690,7 @@ rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, rpcrdma_unmap_one(ia, seg); } else { seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; - seg->mr_base = param.addr; + seg->mr_base = param.bind_info.addr; seg->mr_nsegs = 1; } return rc; @@ -1706,10 +1706,10 @@ rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg, int rc; BUG_ON(seg->mr_nsegs != 1); - param.mr = ia->ri_bind_mem; - param.addr = 0ULL; /* unbind */ - param.length = 0; - param.mw_access_flags = 0; + param.bind_info.mr = ia->ri_bind_mem; + param.bind_info.addr = 0ULL; /* unbind */ + param.bind_info.length = 0; + param.bind_info.mw_access_flags = 0; if (*r) { param.wr_id = (u64) (unsigned long) *r; param.send_flags = IB_SEND_SIGNALED; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 9a66c95..cc1445d 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -235,13 +235,13 @@ struct rpcrdma_create_data_internal { }; #define RPCRDMA_INLINE_READ_THRESHOLD(rq) \ - (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_rsize) + (rpcx_to_rdmad(rq->rq_xprt).inline_rsize) #define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\ - (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_wsize) + (rpcx_to_rdmad(rq->rq_xprt).inline_wsize) #define RPCRDMA_INLINE_PAD_VALUE(rq)\ - rpcx_to_rdmad(rq->rq_task->tk_xprt).padding + rpcx_to_rdmad(rq->rq_xprt).padding /* * Statistics for RPCRDMA diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 68b0a81..3d02130 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -33,6 +33,7 @@ #include <linux/udp.h> #include <linux/tcp.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/xprtsock.h> @@ -770,7 +771,7 @@ static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) goto out_release; if (req->rq_bytes_sent == req->rq_snd_buf.len) goto out_release; - set_bit(XPRT_CLOSE_WAIT, &task->tk_xprt->state); + set_bit(XPRT_CLOSE_WAIT, &xprt->state); out_release: xprt_release_xprt(xprt, task); } @@ -848,6 +849,14 @@ static void xs_tcp_close(struct rpc_xprt *xprt) xs_tcp_shutdown(xprt); } +static void xs_local_destroy(struct rpc_xprt *xprt) +{ + xs_close(xprt); + xs_free_peer_addresses(xprt); + xprt_free(xprt); + module_put(THIS_MODULE); +} + /** * xs_destroy - prepare to shutdown a transport * @xprt: doomed transport @@ -861,10 +870,7 @@ static void xs_destroy(struct rpc_xprt *xprt) cancel_delayed_work_sync(&transport->connect_worker); - xs_close(xprt); - xs_free_peer_addresses(xprt); - xprt_free(xprt); - module_put(THIS_MODULE); + xs_local_destroy(xprt); } static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) @@ -1005,7 +1011,7 @@ static void xs_udp_data_ready(struct sock *sk, int len) UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS); - xprt_adjust_cwnd(task, copied); + xprt_adjust_cwnd(xprt, task, copied); xprt_complete_rqst(task, copied); out_unlock: @@ -1646,9 +1652,9 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t * * Adjust the congestion window after a retransmit timeout has occurred. */ -static void xs_udp_timer(struct rpc_task *task) +static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) { - xprt_adjust_cwnd(task, -ETIMEDOUT); + xprt_adjust_cwnd(xprt, task, -ETIMEDOUT); } static unsigned short xs_get_random_port(void) @@ -1731,7 +1737,9 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) */ static void xs_local_rpcbind(struct rpc_task *task) { - xprt_set_bound(task->tk_xprt); + rcu_read_lock(); + xprt_set_bound(rcu_dereference(task->tk_client->cl_xprt)); + rcu_read_unlock(); } static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port) @@ -1865,13 +1873,9 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, * @xprt: RPC transport to connect * @transport: socket transport to connect * @create_sock: function to create a socket of the correct type - * - * Invoked by a work queue tasklet. */ -static void xs_local_setup_socket(struct work_struct *work) +static int xs_local_setup_socket(struct sock_xprt *transport) { - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); struct rpc_xprt *xprt = &transport->xprt; struct socket *sock; int status = -EIO; @@ -1916,6 +1920,30 @@ out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); current->flags &= ~PF_FSTRANS; + return status; +} + +static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + int ret; + + if (RPC_IS_ASYNC(task)) { + /* + * We want the AF_LOCAL connect to be resolved in the + * filesystem namespace of the process making the rpc + * call. Thus we connect synchronously. + * + * If we want to support asynchronous AF_LOCAL calls, + * we'll need to figure out how to pass a namespace to + * connect. + */ + rpc_exit(task, -ENOTCONN); + return; + } + ret = xs_local_setup_socket(transport); + if (ret && !RPC_IS_SOFTCONN(task)) + msleep_interruptible(15000); } #ifdef CONFIG_SUNRPC_SWAP @@ -2205,6 +2233,7 @@ out: /** * xs_connect - connect a socket to a remote endpoint + * @xprt: pointer to transport structure * @task: address of RPC task that manages state of connect request * * TCP: If the remote end dropped the connection, delay reconnecting. @@ -2216,9 +2245,8 @@ out: * If a UDP socket connect fails, the delay behavior here prevents * retry floods (hard mounts). */ -static void xs_connect(struct rpc_task *task) +static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) { @@ -2453,13 +2481,13 @@ static struct rpc_xprt_ops xs_local_ops = { .alloc_slot = xprt_alloc_slot, .rpcbind = xs_local_rpcbind, .set_port = xs_local_set_port, - .connect = xs_connect, + .connect = xs_local_connect, .buf_alloc = rpc_malloc, .buf_free = rpc_free, .send_request = xs_local_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, .close = xs_close, - .destroy = xs_destroy, + .destroy = xs_local_destroy, .print_stats = xs_local_print_stats, }; @@ -2626,8 +2654,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) goto out_err; } xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, - xs_local_setup_socket); xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); break; default: diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index bc41bd3..4f99600 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -3,8 +3,8 @@ # menuconfig TIPC - tristate "The TIPC Protocol (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL + tristate "The TIPC Protocol" + depends on INET ---help--- The Transparent Inter Process Communication (TIPC) protocol is specially designed for intra cluster communication. This protocol diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 54f89f9..2655c9f 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -774,6 +774,7 @@ void tipc_bclink_init(void) bcl->owner = &bclink->node; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); + spin_lock_init(&bcbearer->bearer.lock); bcl->b_ptr = &bcbearer->bearer; bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 4675477..24b1679 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -473,11 +473,10 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, static struct name_seq *nametbl_find_seq(u32 type) { struct hlist_head *seq_head; - struct hlist_node *seq_node; struct name_seq *ns; seq_head = &table.types[hash(type)]; - hlist_for_each_entry(ns, seq_node, seq_head, ns_list) { + hlist_for_each_entry(ns, seq_head, ns_list) { if (ns->type == type) return ns; } @@ -853,7 +852,6 @@ static int nametbl_list(char *buf, int len, u32 depth_info, u32 type, u32 lowbound, u32 upbound) { struct hlist_head *seq_head; - struct hlist_node *seq_node; struct name_seq *seq; int all_types; int ret = 0; @@ -873,7 +871,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info, upbound = ~0; for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { seq_head = &table.types[i]; - hlist_for_each_entry(seq, seq_node, seq_head, ns_list) { + hlist_for_each_entry(seq, seq_head, ns_list) { ret += nameseq_list(seq, buf + ret, len - ret, depth, seq->type, lowbound, upbound, i); @@ -889,7 +887,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info, ret += nametbl_header(buf + ret, len - ret, depth); i = hash(type); seq_head = &table.types[i]; - hlist_for_each_entry(seq, seq_node, seq_head, ns_list) { + hlist_for_each_entry(seq, seq_head, ns_list) { if (seq->type == type) { ret += nameseq_list(seq, buf + ret, len - ret, depth, type, diff --git a/net/tipc/node.c b/net/tipc/node.c index 48f39dd..6e6c434 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -69,12 +69,11 @@ static unsigned int tipc_hashfn(u32 addr) struct tipc_node *tipc_node_find(u32 addr) { struct tipc_node *node; - struct hlist_node *pos; if (unlikely(!in_own_cluster_exact(addr))) return NULL; - hlist_for_each_entry(node, pos, &node_htable[tipc_hashfn(addr)], hash) { + hlist_for_each_entry(node, &node_htable[tipc_hashfn(addr)], hash) { if (node->addr == addr) return node; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9b4e483..515ce38 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -43,7 +43,8 @@ #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ -#define OVERLOAD_LIMIT_BASE 10000 +#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ + SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ struct tipc_sock { @@ -129,19 +130,6 @@ static void advance_rx_queue(struct sock *sk) } /** - * discard_rx_queue - discard all buffers in socket receive queue - * - * Caller must hold socket lock - */ -static void discard_rx_queue(struct sock *sk) -{ - struct sk_buff *buf; - - while ((buf = __skb_dequeue(&sk->sk_receive_queue))) - kfree_skb(buf); -} - -/** * reject_rx_queue - reject all buffers in socket receive queue * * Caller must hold socket lock @@ -215,7 +203,6 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); sk->sk_backlog_rcv = backlog_rcv; - sk->sk_rcvbuf = TIPC_FLOW_CONTROL_WIN * 2 * TIPC_MAX_USER_MSG_SIZE * 2; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; tipc_sk(sk)->p = tp_ptr; @@ -292,7 +279,7 @@ static int release(struct socket *sock) res = tipc_deleteport(tport->ref); /* Discard any remaining (connection-based) messages in receive queue */ - discard_rx_queue(sk); + __skb_queue_purge(&sk->sk_receive_queue); /* Reject any messages that accumulated in backlog queue */ sock->state = SS_DISCONNECTING; @@ -516,8 +503,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, if (unlikely((m->msg_namelen < sizeof(*dest)) || (dest->family != AF_TIPC))) return -EINVAL; - if ((total_len > TIPC_MAX_USER_MSG_SIZE) || - (m->msg_iovlen > (unsigned int)INT_MAX)) + if (total_len > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (iocb) @@ -625,8 +611,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, if (unlikely(dest)) return send_msg(iocb, sock, m, total_len); - if ((total_len > TIPC_MAX_USER_MSG_SIZE) || - (m->msg_iovlen > (unsigned int)INT_MAX)) + if (total_len > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (iocb) @@ -711,8 +696,7 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, goto exit; } - if ((total_len > (unsigned int)INT_MAX) || - (m->msg_iovlen > (unsigned int)INT_MAX)) { + if (total_len > (unsigned int)INT_MAX) { res = -EMSGSIZE; goto exit; } @@ -806,6 +790,7 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) if (addr) { addr->family = AF_TIPC; addr->addrtype = TIPC_ADDR_ID; + memset(&addr->addr, 0, sizeof(addr->addr)); addr->addr.id.ref = msg_origport(msg); addr->addr.id.node = msg_orignode(msg); addr->addr.name.domain = 0; /* could leave uninitialized */ @@ -920,6 +905,9 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } + /* will be updated in set_orig_addr() if needed */ + m->msg_namelen = 0; + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: @@ -1029,6 +1017,9 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, goto exit; } + /* will be updated in set_orig_addr() if needed */ + m->msg_namelen = 0; + target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); @@ -1155,34 +1146,6 @@ static void tipc_data_ready(struct sock *sk, int len) } /** - * rx_queue_full - determine if receive queue can accept another message - * @msg: message to be added to queue - * @queue_size: current size of queue - * @base: nominal maximum size of queue - * - * Returns 1 if queue is unable to accept message, 0 otherwise - */ -static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base) -{ - u32 threshold; - u32 imp = msg_importance(msg); - - if (imp == TIPC_LOW_IMPORTANCE) - threshold = base; - else if (imp == TIPC_MEDIUM_IMPORTANCE) - threshold = base * 2; - else if (imp == TIPC_HIGH_IMPORTANCE) - threshold = base * 100; - else - return 0; - - if (msg_connected(msg)) - threshold *= 4; - - return queue_size >= threshold; -} - -/** * filter_connect - Handle all incoming messages for a connection-based socket * @tsock: TIPC socket * @msg: message @@ -1260,6 +1223,36 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) } /** + * rcvbuf_limit - get proper overload limit of socket receive queue + * @sk: socket + * @buf: message + * + * For all connection oriented messages, irrespective of importance, + * the default overload value (i.e. 67MB) is set as limit. + * + * For all connectionless messages, by default new queue limits are + * as belows: + * + * TIPC_LOW_IMPORTANCE (5MB) + * TIPC_MEDIUM_IMPORTANCE (10MB) + * TIPC_HIGH_IMPORTANCE (20MB) + * TIPC_CRITICAL_IMPORTANCE (40MB) + * + * Returns overload limit according to corresponding message importance + */ +static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + unsigned int limit; + + if (msg_connected(msg)) + limit = CONN_OVERLOAD_LIMIT; + else + limit = sk->sk_rcvbuf << (msg_importance(msg) + 5); + return limit; +} + +/** * filter_rcv - validate incoming message * @sk: socket * @buf: message @@ -1275,7 +1268,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) { struct socket *sock = sk->sk_socket; struct tipc_msg *msg = buf_msg(buf); - u32 recv_q_len; + unsigned int limit = rcvbuf_limit(sk, buf); u32 res = TIPC_OK; /* Reject message if it is wrong sort of message for socket */ @@ -1292,15 +1285,13 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) } /* Reject message if there isn't room to queue it */ - recv_q_len = skb_queue_len(&sk->sk_receive_queue); - if (unlikely(recv_q_len >= (OVERLOAD_LIMIT_BASE / 2))) { - if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE / 2)) - return TIPC_ERR_OVERLOAD; - } + if (sk_rmem_alloc_get(sk) + buf->truesize >= limit) + return TIPC_ERR_OVERLOAD; - /* Enqueue message (finally!) */ + /* Enqueue message */ TIPC_SKB_CB(buf)->handle = 0; __skb_queue_tail(&sk->sk_receive_queue, buf); + skb_set_owner_r(buf, sk); sk->sk_data_ready(sk, 0); return TIPC_OK; @@ -1349,7 +1340,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) if (!sock_owned_by_user(sk)) { res = filter_rcv(sk, buf); } else { - if (sk_add_backlog(sk, buf, sk->sk_rcvbuf)) + if (sk_add_backlog(sk, buf, rcvbuf_limit(sk, buf))) res = TIPC_ERR_OVERLOAD; else res = TIPC_OK; @@ -1583,6 +1574,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) } else { __skb_dequeue(&sk->sk_receive_queue); __skb_queue_head(&new_sk->sk_receive_queue, buf); + skb_set_owner_r(buf, new_sk); } release_sock(new_sk); @@ -1637,7 +1629,7 @@ restart: case SS_DISCONNECTING: /* Discard any unreceived messages */ - discard_rx_queue(sk); + __skb_queue_purge(&sk->sk_receive_queue); /* Wake up anyone sleeping in poll */ sk->sk_state_change(sk); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5b5c876..2db702d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -263,9 +263,8 @@ static struct sock *__unix_find_socket_byname(struct net *net, int len, int type, unsigned int hash) { struct sock *s; - struct hlist_node *node; - sk_for_each(s, node, &unix_socket_table[hash ^ type]) { + sk_for_each(s, &unix_socket_table[hash ^ type]) { struct unix_sock *u = unix_sk(s); if (!net_eq(sock_net(s), net)) @@ -298,10 +297,9 @@ static inline struct sock *unix_find_socket_byname(struct net *net, static struct sock *unix_find_socket_byinode(struct inode *i) { struct sock *s; - struct hlist_node *node; spin_lock(&unix_table_lock); - sk_for_each(s, node, + sk_for_each(s, &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->path.dentry; @@ -384,7 +382,7 @@ static void unix_sock_destructor(struct sock *sk) #endif } -static int unix_release_sock(struct sock *sk, int embrion) +static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); struct path path; @@ -453,8 +451,6 @@ static int unix_release_sock(struct sock *sk, int embrion) if (unix_tot_inflight) unix_gc(); /* Garbage collect fds */ - - return 0; } static void init_peercred(struct sock *sk) @@ -701,9 +697,10 @@ static int unix_release(struct socket *sock) if (!sk) return 0; + unix_release_sock(sk, 0); sock->sk = NULL; - return unix_release_sock(sk, 0); + return 0; } static int unix_autobind(struct socket *sock) @@ -1996,7 +1993,7 @@ again: if ((UNIXCB(skb).pid != siocb->scm->pid) || (UNIXCB(skb).cred != siocb->scm->cred)) break; - } else { + } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); check_creds = 1; @@ -2402,7 +2399,7 @@ static int __net_init unix_net_init(struct net *net) goto out; #ifdef CONFIG_PROC_FS - if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) { + if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) { unix_sysctl_unregister(net); goto out; } @@ -2415,7 +2412,7 @@ out: static void __net_exit unix_net_exit(struct net *net) { unix_sysctl_unregister(net); - proc_net_remove(net, "unix"); + remove_proc_entry("unix", net->proc_net); } static struct pernet_operations unix_net_ops = { @@ -2426,9 +2423,8 @@ static struct pernet_operations unix_net_ops = { static int __init af_unix_init(void) { int rc = -1; - struct sk_buff *dummy_skb; - BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)); + BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb)); rc = proto_register(&unix_proto, 1); if (rc != 0) { diff --git a/net/unix/diag.c b/net/unix/diag.c index 5ac19dc..d591091 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -192,10 +192,9 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) slot < ARRAY_SIZE(unix_socket_table); s_num = 0, slot++) { struct sock *sk; - struct hlist_node *node; num = 0; - sk_for_each(sk, node, &unix_socket_table[slot]) { + sk_for_each(sk, &unix_socket_table[slot]) { if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) @@ -226,9 +225,7 @@ static struct sock *unix_lookup_by_ino(int ino) spin_lock(&unix_table_lock); for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { - struct hlist_node *node; - - sk_for_each(sk, node, &unix_socket_table[i]) + sk_for_each(sk, &unix_socket_table[i]) if (ino == sock_i_ino(sk)) { sock_hold(sk); spin_unlock(&unix_table_lock); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index b6f4b99..d0f6545 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -99,7 +99,7 @@ unsigned int unix_tot_inflight; struct sock *unix_get_socket(struct file *filp) { struct sock *u_sock = NULL; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); /* * Socket ? diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig new file mode 100644 index 0000000..b5fa7e4 --- /dev/null +++ b/net/vmw_vsock/Kconfig @@ -0,0 +1,28 @@ +# +# Vsock protocol +# + +config VSOCKETS + tristate "Virtual Socket protocol" + help + Virtual Socket Protocol is a socket protocol similar to TCP/IP + allowing comunication between Virtual Machines and hypervisor + or host. + + You should also select one or more hypervisor-specific transports + below. + + To compile this driver as a module, choose M here: the module + will be called vsock. If unsure, say N. + +config VMWARE_VMCI_VSOCKETS + tristate "VMware VMCI transport for Virtual Sockets" + depends on VSOCKETS && VMWARE_VMCI + help + This module implements a VMCI transport for Virtual Sockets. + + Enable this transport if your Virtual Machine runs on a VMware + hypervisor. + + To compile this driver as a module, choose M here: the module + will be called vmw_vsock_vmci_transport. If unsure, say N. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile new file mode 100644 index 0000000..2ce52d7 --- /dev/null +++ b/net/vmw_vsock/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_VSOCKETS) += vsock.o +obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o + +vsock-y += af_vsock.o vsock_addr.o + +vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ + vmci_transport_notify_qstate.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c new file mode 100644 index 0000000..7f93e2a --- /dev/null +++ b/net/vmw_vsock/af_vsock.c @@ -0,0 +1,2014 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +/* Implementation notes: + * + * - There are two kinds of sockets: those created by user action (such as + * calling socket(2)) and those created by incoming connection request packets. + * + * - There are two "global" tables, one for bound sockets (sockets that have + * specified an address that they are responsible for) and one for connected + * sockets (sockets that have established a connection with another socket). + * These tables are "global" in that all sockets on the system are placed + * within them. - Note, though, that the bound table contains an extra entry + * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in + * that list. The bound table is used solely for lookup of sockets when packets + * are received and that's not necessary for SOCK_DGRAM sockets since we create + * a datagram handle for each and need not perform a lookup. Keeping SOCK_DGRAM + * sockets out of the bound hash buckets will reduce the chance of collisions + * when looking for SOCK_STREAM sockets and prevents us from having to check the + * socket type in the hash table lookups. + * + * - Sockets created by user action will either be "client" sockets that + * initiate a connection or "server" sockets that listen for connections; we do + * not support simultaneous connects (two "client" sockets connecting). + * + * - "Server" sockets are referred to as listener sockets throughout this + * implementation because they are in the SS_LISTEN state. When a connection + * request is received (the second kind of socket mentioned above), we create a + * new socket and refer to it as a pending socket. These pending sockets are + * placed on the pending connection list of the listener socket. When future + * packets are received for the address the listener socket is bound to, we + * check if the source of the packet is from one that has an existing pending + * connection. If it does, we process the packet for the pending socket. When + * that socket reaches the connected state, it is removed from the listener + * socket's pending list and enqueued in the listener socket's accept queue. + * Callers of accept(2) will accept connected sockets from the listener socket's + * accept queue. If the socket cannot be accepted for some reason then it is + * marked rejected. Once the connection is accepted, it is owned by the user + * process and the responsibility for cleanup falls with that user process. + * + * - It is possible that these pending sockets will never reach the connected + * state; in fact, we may never receive another packet after the connection + * request. Because of this, we must schedule a cleanup function to run in the + * future, after some amount of time passes where a connection should have been + * established. This function ensures that the socket is off all lists so it + * cannot be retrieved, then drops all references to the socket so it is cleaned + * up (sock_put() -> sk_free() -> our sk_destruct implementation). Note this + * function will also cleanup rejected sockets, those that reach the connected + * state but leave it before they have been accepted. + * + * - Sockets created by user action will be cleaned up when the user process + * calls close(2), causing our release implementation to be called. Our release + * implementation will perform some cleanup then drop the last reference so our + * sk_destruct implementation is invoked. Our sk_destruct implementation will + * perform additional cleanup that's common for both types of sockets. + * + * - A socket's reference count is what ensures that the structure won't be + * freed. Each entry in a list (such as the "global" bound and connected tables + * and the listener socket's pending list and connected queue) ensures a + * reference. When we defer work until process context and pass a socket as our + * argument, we must ensure the reference count is increased to ensure the + * socket isn't freed before the function is run; the deferred function will + * then drop the reference. + */ + +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/cred.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/kmod.h> +#include <linux/list.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/net.h> +#include <linux/poll.h> +#include <linux/skbuff.h> +#include <linux/smp.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <net/sock.h> + +#include "af_vsock.h" + +static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); +static void vsock_sk_destruct(struct sock *sk); +static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); + +/* Protocol family. */ +static struct proto vsock_proto = { + .name = "AF_VSOCK", + .owner = THIS_MODULE, + .obj_size = sizeof(struct vsock_sock), +}; + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define SS_LISTEN 255 + +static const struct vsock_transport *transport; +static DEFINE_MUTEX(vsock_register_mutex); + +/**** EXPORTS ****/ + +/* Get the ID of the local context. This is transport dependent. */ + +int vm_sockets_get_local_cid(void) +{ + return transport->get_local_cid(); +} +EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid); + +/**** UTILS ****/ + +/* Each bound VSocket is stored in the bind hash table and each connected + * VSocket is stored in the connected hash table. + * + * Unbound sockets are all put on the same list attached to the end of the hash + * table (vsock_unbound_sockets). Bound sockets are added to the hash table in + * the bucket that their local address hashes to (vsock_bound_sockets(addr) + * represents the list that addr hashes to). + * + * Specifically, we initialize the vsock_bind_table array to a size of + * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through + * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and + * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function + * mods with VSOCK_HASH_SIZE - 1 to ensure this. + */ +#define VSOCK_HASH_SIZE 251 +#define MAX_PORT_RETRIES 24 + +#define VSOCK_HASH(addr) ((addr)->svm_port % (VSOCK_HASH_SIZE - 1)) +#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) +#define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) + +/* XXX This can probably be implemented in a better way. */ +#define VSOCK_CONN_HASH(src, dst) \ + (((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1)) +#define vsock_connected_sockets(src, dst) \ + (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)]) +#define vsock_connected_sockets_vsk(vsk) \ + vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr) + +static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; +static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; +static DEFINE_SPINLOCK(vsock_table_lock); + +static __init void vsock_init_tables(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++) + INIT_LIST_HEAD(&vsock_bind_table[i]); + + for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) + INIT_LIST_HEAD(&vsock_connected_table[i]); +} + +static void __vsock_insert_bound(struct list_head *list, + struct vsock_sock *vsk) +{ + sock_hold(&vsk->sk); + list_add(&vsk->bound_table, list); +} + +static void __vsock_insert_connected(struct list_head *list, + struct vsock_sock *vsk) +{ + sock_hold(&vsk->sk); + list_add(&vsk->connected_table, list); +} + +static void __vsock_remove_bound(struct vsock_sock *vsk) +{ + list_del_init(&vsk->bound_table); + sock_put(&vsk->sk); +} + +static void __vsock_remove_connected(struct vsock_sock *vsk) +{ + list_del_init(&vsk->connected_table); + sock_put(&vsk->sk); +} + +static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) + if (addr->svm_port == vsk->local_addr.svm_port) + return sk_vsock(vsk); + + return NULL; +} + +static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_connected_sockets(src, dst), + connected_table) { + if (vsock_addr_equals_addr(src, &vsk->remote_addr) && + dst->svm_port == vsk->local_addr.svm_port) { + return sk_vsock(vsk); + } + } + + return NULL; +} + +static bool __vsock_in_bound_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->bound_table); +} + +static bool __vsock_in_connected_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->connected_table); +} + +static void vsock_insert_unbound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_insert_bound(vsock_unbound_sockets, vsk); + spin_unlock_bh(&vsock_table_lock); +} + +void vsock_insert_connected(struct vsock_sock *vsk) +{ + struct list_head *list = vsock_connected_sockets( + &vsk->remote_addr, &vsk->local_addr); + + spin_lock_bh(&vsock_table_lock); + __vsock_insert_connected(list, vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_insert_connected); + +void vsock_remove_bound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_remove_bound(vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_remove_bound); + +void vsock_remove_connected(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_remove_connected(vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_remove_connected); + +struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_bound_socket(addr); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_bound_socket); + +struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_connected_socket(src, dst); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_connected_socket); + +static bool vsock_in_bound_table(struct vsock_sock *vsk) +{ + bool ret; + + spin_lock_bh(&vsock_table_lock); + ret = __vsock_in_bound_table(vsk); + spin_unlock_bh(&vsock_table_lock); + + return ret; +} + +static bool vsock_in_connected_table(struct vsock_sock *vsk) +{ + bool ret; + + spin_lock_bh(&vsock_table_lock); + ret = __vsock_in_connected_table(vsk); + spin_unlock_bh(&vsock_table_lock); + + return ret; +} + +void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +{ + int i; + + spin_lock_bh(&vsock_table_lock); + + for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { + struct vsock_sock *vsk; + list_for_each_entry(vsk, &vsock_connected_table[i], + connected_table); + fn(sk_vsock(vsk)); + } + + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket); + +void vsock_add_pending(struct sock *listener, struct sock *pending) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + + vlistener = vsock_sk(listener); + vpending = vsock_sk(pending); + + sock_hold(pending); + sock_hold(listener); + list_add_tail(&vpending->pending_links, &vlistener->pending_links); +} +EXPORT_SYMBOL_GPL(vsock_add_pending); + +void vsock_remove_pending(struct sock *listener, struct sock *pending) +{ + struct vsock_sock *vpending = vsock_sk(pending); + + list_del_init(&vpending->pending_links); + sock_put(listener); + sock_put(pending); +} +EXPORT_SYMBOL_GPL(vsock_remove_pending); + +void vsock_enqueue_accept(struct sock *listener, struct sock *connected) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vconnected; + + vlistener = vsock_sk(listener); + vconnected = vsock_sk(connected); + + sock_hold(connected); + sock_hold(listener); + list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue); +} +EXPORT_SYMBOL_GPL(vsock_enqueue_accept); + +static struct sock *vsock_dequeue_accept(struct sock *listener) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vconnected; + + vlistener = vsock_sk(listener); + + if (list_empty(&vlistener->accept_queue)) + return NULL; + + vconnected = list_entry(vlistener->accept_queue.next, + struct vsock_sock, accept_queue); + + list_del_init(&vconnected->accept_queue); + sock_put(listener); + /* The caller will need a reference on the connected socket so we let + * it call sock_put(). + */ + + return sk_vsock(vconnected); +} + +static bool vsock_is_accept_queue_empty(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + return list_empty(&vsk->accept_queue); +} + +static bool vsock_is_pending(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + return !list_empty(&vsk->pending_links); +} + +static int vsock_send_shutdown(struct sock *sk, int mode) +{ + return transport->shutdown(vsock_sk(sk), mode); +} + +void vsock_pending_work(struct work_struct *work) +{ + struct sock *sk; + struct sock *listener; + struct vsock_sock *vsk; + bool cleanup; + + vsk = container_of(work, struct vsock_sock, dwork.work); + sk = sk_vsock(vsk); + listener = vsk->listener; + cleanup = true; + + lock_sock(listener); + lock_sock(sk); + + if (vsock_is_pending(sk)) { + vsock_remove_pending(listener, sk); + } else if (!vsk->rejected) { + /* We are not on the pending list and accept() did not reject + * us, so we must have been accepted by our user process. We + * just need to drop our references to the sockets and be on + * our way. + */ + cleanup = false; + goto out; + } + + listener->sk_ack_backlog--; + + /* We need to remove ourself from the global connected sockets list so + * incoming packets can't find this socket, and to reduce the reference + * count. + */ + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); + + sk->sk_state = SS_FREE; + +out: + release_sock(sk); + release_sock(listener); + if (cleanup) + sock_put(sk); + + sock_put(sk); + sock_put(listener); +} +EXPORT_SYMBOL_GPL(vsock_pending_work); + +/**** SOCKET OPERATIONS ****/ + +static int __vsock_bind_stream(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + static u32 port = LAST_RESERVED_PORT + 1; + struct sockaddr_vm new_addr; + + vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); + + if (addr->svm_port == VMADDR_PORT_ANY) { + bool found = false; + unsigned int i; + + for (i = 0; i < MAX_PORT_RETRIES; i++) { + if (port <= LAST_RESERVED_PORT) + port = LAST_RESERVED_PORT + 1; + + new_addr.svm_port = port++; + + if (!__vsock_find_bound_socket(&new_addr)) { + found = true; + break; + } + } + + if (!found) + return -EADDRNOTAVAIL; + } else { + /* If port is in reserved range, ensure caller + * has necessary privileges. + */ + if (addr->svm_port <= LAST_RESERVED_PORT && + !capable(CAP_NET_BIND_SERVICE)) { + return -EACCES; + } + + if (__vsock_find_bound_socket(&new_addr)) + return -EADDRINUSE; + } + + vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); + + /* Remove stream sockets from the unbound list and add them to the hash + * table for easy lookup by its address. The unbound list is simply an + * extra entry at the end of the hash table, a trick used by AF_UNIX. + */ + __vsock_remove_bound(vsk); + __vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk); + + return 0; +} + +static int __vsock_bind_dgram(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + return transport->dgram_bind(vsk, addr); +} + +static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk = vsock_sk(sk); + u32 cid; + int retval; + + /* First ensure this socket isn't already bound. */ + if (vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + /* Now bind to the provided address or select appropriate values if + * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that + * like AF_INET prevents binding to a non-local IP address (in most + * cases), we only allow binding to the local CID. + */ + cid = transport->get_local_cid(); + if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY) + return -EADDRNOTAVAIL; + + switch (sk->sk_socket->type) { + case SOCK_STREAM: + spin_lock_bh(&vsock_table_lock); + retval = __vsock_bind_stream(vsk, addr); + spin_unlock_bh(&vsock_table_lock); + break; + + case SOCK_DGRAM: + retval = __vsock_bind_dgram(vsk, addr); + break; + + default: + retval = -EINVAL; + break; + } + + return retval; +} + +struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, + gfp_t priority, + unsigned short type) +{ + struct sock *sk; + struct vsock_sock *psk; + struct vsock_sock *vsk; + + sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto); + if (!sk) + return NULL; + + sock_init_data(sock, sk); + + /* sk->sk_type is normally set in sock_init_data, but only if sock is + * non-NULL. We make sure that our sockets always have a type by + * setting it here if needed. + */ + if (!sock) + sk->sk_type = type; + + vsk = vsock_sk(sk); + vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + sk->sk_destruct = vsock_sk_destruct; + sk->sk_backlog_rcv = vsock_queue_rcv_skb; + sk->sk_state = 0; + sock_reset_flag(sk, SOCK_DONE); + + INIT_LIST_HEAD(&vsk->bound_table); + INIT_LIST_HEAD(&vsk->connected_table); + vsk->listener = NULL; + INIT_LIST_HEAD(&vsk->pending_links); + INIT_LIST_HEAD(&vsk->accept_queue); + vsk->rejected = false; + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + vsk->peer_shutdown = 0; + + psk = parent ? vsock_sk(parent) : NULL; + if (parent) { + vsk->trusted = psk->trusted; + vsk->owner = get_cred(psk->owner); + vsk->connect_timeout = psk->connect_timeout; + } else { + vsk->trusted = capable(CAP_NET_ADMIN); + vsk->owner = get_current_cred(); + vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; + } + + if (transport->init(vsk, psk) < 0) { + sk_free(sk); + return NULL; + } + + if (sock) + vsock_insert_unbound(vsk); + + return sk; +} +EXPORT_SYMBOL_GPL(__vsock_create); + +static void __vsock_release(struct sock *sk) +{ + if (sk) { + struct sk_buff *skb; + struct sock *pending; + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + pending = NULL; /* Compiler warning. */ + + if (vsock_in_bound_table(vsk)) + vsock_remove_bound(vsk); + + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); + + transport->release(vsk); + + lock_sock(sk); + sock_orphan(sk); + sk->sk_shutdown = SHUTDOWN_MASK; + + while ((skb = skb_dequeue(&sk->sk_receive_queue))) + kfree_skb(skb); + + /* Clean up any sockets that never were accepted. */ + while ((pending = vsock_dequeue_accept(sk)) != NULL) { + __vsock_release(pending); + sock_put(pending); + } + + release_sock(sk); + sock_put(sk); + } +} + +static void vsock_sk_destruct(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + transport->destruct(vsk); + + /* When clearing these addresses, there's no need to set the family and + * possibly register the address family with the kernel. + */ + vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + put_cred(vsk->owner); +} + +static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int err; + + err = sock_queue_rcv_skb(sk, skb); + if (err) + kfree_skb(skb); + + return err; +} + +s64 vsock_stream_has_data(struct vsock_sock *vsk) +{ + return transport->stream_has_data(vsk); +} +EXPORT_SYMBOL_GPL(vsock_stream_has_data); + +s64 vsock_stream_has_space(struct vsock_sock *vsk) +{ + return transport->stream_has_space(vsk); +} +EXPORT_SYMBOL_GPL(vsock_stream_has_space); + +static int vsock_release(struct socket *sock) +{ + __vsock_release(sock->sk); + sock->sk = NULL; + sock->state = SS_FREE; + + return 0; +} + +static int +vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +{ + int err; + struct sock *sk; + struct sockaddr_vm *vm_addr; + + sk = sock->sk; + + if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0) + return -EINVAL; + + lock_sock(sk); + err = __vsock_bind(sk, vm_addr); + release_sock(sk); + + return err; +} + +static int vsock_getname(struct socket *sock, + struct sockaddr *addr, int *addr_len, int peer) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *vm_addr; + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + if (peer) { + if (sock->state != SS_CONNECTED) { + err = -ENOTCONN; + goto out; + } + vm_addr = &vsk->remote_addr; + } else { + vm_addr = &vsk->local_addr; + } + + if (!vm_addr) { + err = -EINVAL; + goto out; + } + + /* sys_getsockname() and sys_getpeername() pass us a + * MAX_SOCK_ADDR-sized buffer and don't set addr_len. Unfortunately + * that macro is defined in socket.c instead of .h, so we hardcode its + * value here. + */ + BUILD_BUG_ON(sizeof(*vm_addr) > 128); + memcpy(addr, vm_addr, sizeof(*vm_addr)); + *addr_len = sizeof(*vm_addr); + +out: + release_sock(sk); + return err; +} + +static int vsock_shutdown(struct socket *sock, int mode) +{ + int err; + struct sock *sk; + + /* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses + * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode + * here like the other address families do. Note also that the + * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3), + * which is what we want. + */ + mode++; + + if ((mode & ~SHUTDOWN_MASK) || !mode) + return -EINVAL; + + /* If this is a STREAM socket and it is not connected then bail out + * immediately. If it is a DGRAM socket then we must first kick the + * socket so that it wakes up from any sleeping calls, for example + * recv(), and then afterwards return the error. + */ + + sk = sock->sk; + if (sock->state == SS_UNCONNECTED) { + err = -ENOTCONN; + if (sk->sk_type == SOCK_STREAM) + return err; + } else { + sock->state = SS_DISCONNECTING; + err = 0; + } + + /* Receive and send shutdowns are treated alike. */ + mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); + if (mode) { + lock_sock(sk); + sk->sk_shutdown |= mode; + sk->sk_state_change(sk); + release_sock(sk); + + if (sk->sk_type == SOCK_STREAM) { + sock_reset_flag(sk, SOCK_DONE); + vsock_send_shutdown(sk, mode); + } + } + + return err; +} + +static unsigned int vsock_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk; + unsigned int mask; + struct vsock_sock *vsk; + + sk = sock->sk; + vsk = vsock_sk(sk); + + poll_wait(file, sk_sleep(sk), wait); + mask = 0; + + if (sk->sk_err) + /* Signify that there has been an error on this socket. */ + mask |= POLLERR; + + /* INET sockets treat local write shutdown and peer write shutdown as a + * case of POLLHUP set. + */ + if ((sk->sk_shutdown == SHUTDOWN_MASK) || + ((sk->sk_shutdown & SEND_SHUTDOWN) && + (vsk->peer_shutdown & SEND_SHUTDOWN))) { + mask |= POLLHUP; + } + + if (sk->sk_shutdown & RCV_SHUTDOWN || + vsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= POLLRDHUP; + } + + if (sock->type == SOCK_DGRAM) { + /* For datagram sockets we can read if there is something in + * the queue and write as long as the socket isn't shutdown for + * sending. + */ + if (!skb_queue_empty(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) { + mask |= POLLIN | POLLRDNORM; + } + + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + } else if (sock->type == SOCK_STREAM) { + lock_sock(sk); + + /* Listening sockets that have connections in their accept + * queue can be read. + */ + if (sk->sk_state == SS_LISTEN + && !vsock_is_accept_queue_empty(sk)) + mask |= POLLIN | POLLRDNORM; + + /* If there is something in the queue then we can read. */ + if (transport->stream_is_active(vsk) && + !(sk->sk_shutdown & RCV_SHUTDOWN)) { + bool data_ready_now = false; + int ret = transport->notify_poll_in( + vsk, 1, &data_ready_now); + if (ret < 0) { + mask |= POLLERR; + } else { + if (data_ready_now) + mask |= POLLIN | POLLRDNORM; + + } + } + + /* Sockets whose connections have been closed, reset, or + * terminated should also be considered read, and we check the + * shutdown flag for that. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN || + vsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= POLLIN | POLLRDNORM; + } + + /* Connected sockets that can produce data can be written. */ + if (sk->sk_state == SS_CONNECTED) { + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { + bool space_avail_now = false; + int ret = transport->notify_poll_out( + vsk, 1, &space_avail_now); + if (ret < 0) { + mask |= POLLERR; + } else { + if (space_avail_now) + /* Remove POLLWRBAND since INET + * sockets are not setting it. + */ + mask |= POLLOUT | POLLWRNORM; + + } + } + } + + /* Simulate INET socket poll behaviors, which sets + * POLLOUT|POLLWRNORM when peer is closed and nothing to read, + * but local send is not shutdown. + */ + if (sk->sk_state == SS_UNCONNECTED) { + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= POLLOUT | POLLWRNORM; + + } + + release_sock(sk); + } + + return mask; +} + +static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + /* For now, MSG_DONTWAIT is always assumed... */ + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + /* If the provided message contains an address, use that. Otherwise + * fall back on the socket's remote handle (if it has been connected). + */ + if (msg->msg_name && + vsock_addr_cast(msg->msg_name, msg->msg_namelen, + &remote_addr) == 0) { + /* Ensure this address is of the right type and is a valid + * destination. + */ + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = transport->get_local_cid(); + + if (!vsock_addr_bound(remote_addr)) { + err = -EINVAL; + goto out; + } + } else if (sock->state == SS_CONNECTED) { + remote_addr = &vsk->remote_addr; + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = transport->get_local_cid(); + + /* XXX Should connect() or this function ensure remote_addr is + * bound? + */ + if (!vsock_addr_bound(&vsk->remote_addr)) { + err = -EINVAL; + goto out; + } + } else { + err = -EINVAL; + goto out; + } + + if (!transport->dgram_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -EINVAL; + goto out; + } + + err = transport->dgram_enqueue(vsk, remote_addr, msg->msg_iov, len); + +out: + release_sock(sk); + return err; +} + +static int vsock_dgram_connect(struct socket *sock, + struct sockaddr *addr, int addr_len, int flags) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + + sk = sock->sk; + vsk = vsock_sk(sk); + + err = vsock_addr_cast(addr, addr_len, &remote_addr); + if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) { + lock_sock(sk); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, + VMADDR_PORT_ANY); + sock->state = SS_UNCONNECTED; + release_sock(sk); + return 0; + } else if (err != 0) + return -EINVAL; + + lock_sock(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + if (!transport->dgram_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -EINVAL; + goto out; + } + + memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr)); + sock->state = SS_CONNECTED; + +out: + release_sock(sk); + return err; +} + +static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len, + flags); +} + +static const struct proto_ops vsock_dgram_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_dgram_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = vsock_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = vsock_dgram_sendmsg, + .recvmsg = vsock_dgram_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static void vsock_connect_timeout(struct work_struct *work) +{ + struct sock *sk; + struct vsock_sock *vsk; + + vsk = container_of(work, struct vsock_sock, dwork.work); + sk = sk_vsock(vsk); + + lock_sock(sk); + if (sk->sk_state == SS_CONNECTING && + (sk->sk_shutdown != SHUTDOWN_MASK)) { + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ETIMEDOUT; + sk->sk_error_report(sk); + } + release_sock(sk); + + sock_put(sk); +} + +static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, + int addr_len, int flags) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + long timeout; + DEFINE_WAIT(wait); + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + /* XXX AF_UNSPEC should make us disconnect like AF_INET. */ + switch (sock->state) { + case SS_CONNECTED: + err = -EISCONN; + goto out; + case SS_DISCONNECTING: + err = -EINVAL; + goto out; + case SS_CONNECTING: + /* This continues on so we can move sock into the SS_CONNECTED + * state once the connection has completed (at which point err + * will be set to zero also). Otherwise, we will either wait + * for the connection or return -EALREADY should this be a + * non-blocking call. + */ + err = -EALREADY; + break; + default: + if ((sk->sk_state == SS_LISTEN) || + vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { + err = -EINVAL; + goto out; + } + + /* The hypervisor and well-known contexts do not have socket + * endpoints. + */ + if (!transport->stream_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -ENETUNREACH; + goto out; + } + + /* Set the remote address that we are connecting to. */ + memcpy(&vsk->remote_addr, remote_addr, + sizeof(vsk->remote_addr)); + + /* Autobind this socket to the local address if necessary. */ + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, + VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + sk->sk_state = SS_CONNECTING; + + err = transport->connect(vsk); + if (err < 0) + goto out; + + /* Mark sock as connecting and set the error code to in + * progress in case this is a non-blocking connect. + */ + sock->state = SS_CONNECTING; + err = -EINPROGRESS; + } + + /* The receive path will handle all communication until we are able to + * enter the connected state. Here we wait for the connection to be + * completed or a notification of an error. + */ + timeout = vsk->connect_timeout; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { + if (flags & O_NONBLOCK) { + /* If we're not going to block, we schedule a timeout + * function to generate a timeout on the connection + * attempt, in case the peer doesn't respond in a + * timely manner. We hold on to the socket until the + * timeout fires. + */ + sock_hold(sk); + INIT_DELAYED_WORK(&vsk->dwork, + vsock_connect_timeout); + schedule_delayed_work(&vsk->dwork, timeout); + + /* Skip ahead to preserve error code set above. */ + goto out_wait; + } + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait_error; + } else if (timeout == 0) { + err = -ETIMEDOUT; + goto out_wait_error; + } + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + } + + if (sk->sk_err) { + err = -sk->sk_err; + goto out_wait_error; + } else + err = 0; + +out_wait: + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; + +out_wait_error: + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + goto out_wait; +} + +static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) +{ + struct sock *listener; + int err; + struct sock *connected; + struct vsock_sock *vconnected; + long timeout; + DEFINE_WAIT(wait); + + err = 0; + listener = sock->sk; + + lock_sock(listener); + + if (sock->type != SOCK_STREAM) { + err = -EOPNOTSUPP; + goto out; + } + + if (listener->sk_state != SS_LISTEN) { + err = -EINVAL; + goto out; + } + + /* Wait for children sockets to appear; these are the new sockets + * created upon connection establishment. + */ + timeout = sock_sndtimeo(listener, flags & O_NONBLOCK); + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + + while ((connected = vsock_dequeue_accept(listener)) == NULL && + listener->sk_err == 0) { + release_sock(listener); + timeout = schedule_timeout(timeout); + lock_sock(listener); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + } + + if (listener->sk_err) + err = -listener->sk_err; + + if (connected) { + listener->sk_ack_backlog--; + + lock_sock(connected); + vconnected = vsock_sk(connected); + + /* If the listener socket has received an error, then we should + * reject this socket and return. Note that we simply mark the + * socket rejected, drop our reference, and let the cleanup + * function handle the cleanup; the fact that we found it in + * the listener's accept queue guarantees that the cleanup + * function hasn't run yet. + */ + if (err) { + vconnected->rejected = true; + release_sock(connected); + sock_put(connected); + goto out_wait; + } + + newsock->state = SS_CONNECTED; + sock_graft(connected, newsock); + release_sock(connected); + sock_put(connected); + } + +out_wait: + finish_wait(sk_sleep(listener), &wait); +out: + release_sock(listener); + return err; +} + +static int vsock_listen(struct socket *sock, int backlog) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + + sk = sock->sk; + + lock_sock(sk); + + if (sock->type != SOCK_STREAM) { + err = -EOPNOTSUPP; + goto out; + } + + if (sock->state != SS_UNCONNECTED) { + err = -EINVAL; + goto out; + } + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + err = -EINVAL; + goto out; + } + + sk->sk_max_ack_backlog = backlog; + sk->sk_state = SS_LISTEN; + + err = 0; + +out: + release_sock(sk); + return err; +} + +static int vsock_stream_setsockopt(struct socket *sock, + int level, + int optname, + char __user *optval, + unsigned int optlen) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; + + if (level != AF_VSOCK) + return -ENOPROTOOPT; + +#define COPY_IN(_v) \ + do { \ + if (optlen < sizeof(_v)) { \ + err = -EINVAL; \ + goto exit; \ + } \ + if (copy_from_user(&_v, optval, sizeof(_v)) != 0) { \ + err = -EFAULT; \ + goto exit; \ + } \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + switch (optname) { + case SO_VM_SOCKETS_BUFFER_SIZE: + COPY_IN(val); + transport->set_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_BUFFER_MAX_SIZE: + COPY_IN(val); + transport->set_max_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_BUFFER_MIN_SIZE: + COPY_IN(val); + transport->set_min_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct timeval tv; + COPY_IN(tv); + if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC && + tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) { + vsk->connect_timeout = tv.tv_sec * HZ + + DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ)); + if (vsk->connect_timeout == 0) + vsk->connect_timeout = + VSOCK_DEFAULT_CONNECT_TIMEOUT; + + } else { + err = -ERANGE; + } + break; + } + + default: + err = -ENOPROTOOPT; + break; + } + +#undef COPY_IN + +exit: + release_sock(sk); + return err; +} + +static int vsock_stream_getsockopt(struct socket *sock, + int level, int optname, + char __user *optval, + int __user *optlen) +{ + int err; + int len; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; + + if (level != AF_VSOCK) + return -ENOPROTOOPT; + + err = get_user(len, optlen); + if (err != 0) + return err; + +#define COPY_OUT(_v) \ + do { \ + if (len < sizeof(_v)) \ + return -EINVAL; \ + \ + len = sizeof(_v); \ + if (copy_to_user(optval, &_v, len) != 0) \ + return -EFAULT; \ + \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + switch (optname) { + case SO_VM_SOCKETS_BUFFER_SIZE: + val = transport->get_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_BUFFER_MAX_SIZE: + val = transport->get_max_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_BUFFER_MIN_SIZE: + val = transport->get_min_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct timeval tv; + tv.tv_sec = vsk->connect_timeout / HZ; + tv.tv_usec = + (vsk->connect_timeout - + tv.tv_sec * HZ) * (1000000 / HZ); + COPY_OUT(tv); + break; + } + default: + return -ENOPROTOOPT; + } + + err = put_user(len, optlen); + if (err != 0) + return -EFAULT; + +#undef COPY_OUT + + return 0; +} + +static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk; + struct vsock_sock *vsk; + ssize_t total_written; + long timeout; + int err; + struct vsock_transport_send_notify_data send_data; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + total_written = 0; + err = 0; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + lock_sock(sk); + + /* Callers should not provide a destination with stream sockets. */ + if (msg->msg_namelen) { + err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP; + goto out; + } + + /* Send data only if both sides are not shutdown in the direction. */ + if (sk->sk_shutdown & SEND_SHUTDOWN || + vsk->peer_shutdown & RCV_SHUTDOWN) { + err = -EPIPE; + goto out; + } + + if (sk->sk_state != SS_CONNECTED || + !vsock_addr_bound(&vsk->local_addr)) { + err = -ENOTCONN; + goto out; + } + + if (!vsock_addr_bound(&vsk->remote_addr)) { + err = -EDESTADDRREQ; + goto out; + } + + /* Wait for room in the produce queue to enqueue our user's data. */ + timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + + err = transport->notify_send_init(vsk, &send_data); + if (err < 0) + goto out; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (total_written < len) { + ssize_t written; + + while (vsock_stream_has_space(vsk) == 0 && + sk->sk_err == 0 && + !(sk->sk_shutdown & SEND_SHUTDOWN) && + !(vsk->peer_shutdown & RCV_SHUTDOWN)) { + + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + err = transport->notify_send_pre_block(vsk, &send_data); + if (err < 0) + goto out_wait; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } + + /* These checks occur both as part of and after the loop + * conditional since we need to check before and after + * sleeping. + */ + if (sk->sk_err) { + err = -sk->sk_err; + goto out_wait; + } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || + (vsk->peer_shutdown & RCV_SHUTDOWN)) { + err = -EPIPE; + goto out_wait; + } + + err = transport->notify_send_pre_enqueue(vsk, &send_data); + if (err < 0) + goto out_wait; + + /* Note that enqueue will only write as many bytes as are free + * in the produce queue, so we don't need to ensure len is + * smaller than the queue size. It is the caller's + * responsibility to check how many bytes we were able to send. + */ + + written = transport->stream_enqueue( + vsk, msg->msg_iov, + len - total_written); + if (written < 0) { + err = -ENOMEM; + goto out_wait; + } + + total_written += written; + + err = transport->notify_send_post_enqueue( + vsk, written, &send_data); + if (err < 0) + goto out_wait; + + } + +out_wait: + if (total_written > 0) + err = total_written; + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; +} + + +static int +vsock_stream_recvmsg(struct kiocb *kiocb, + struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + struct sock *sk; + struct vsock_sock *vsk; + int err; + size_t target; + ssize_t copied; + long timeout; + struct vsock_transport_recv_notify_data recv_data; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + msg->msg_namelen = 0; + + lock_sock(sk); + + if (sk->sk_state != SS_CONNECTED) { + /* Recvmsg is supposed to return 0 if a peer performs an + * orderly shutdown. Differentiate between that case and when a + * peer has not connected or a local shutdown occured with the + * SOCK_DONE flag. + */ + if (sock_flag(sk, SOCK_DONE)) + err = 0; + else + err = -ENOTCONN; + + goto out; + } + + if (flags & MSG_OOB) { + err = -EOPNOTSUPP; + goto out; + } + + /* We don't check peer_shutdown flag here since peer may actually shut + * down, but there can be data in the queue that a local socket can + * receive. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN) { + err = 0; + goto out; + } + + /* It is valid on Linux to pass in a zero-length receive buffer. This + * is not an error. We may as well bail out now. + */ + if (!len) { + err = 0; + goto out; + } + + /* We must not copy less than target bytes into the user's buffer + * before returning successfully, so we wait for the consume queue to + * have that much data to consume before dequeueing. Note that this + * makes it impossible to handle cases where target is greater than the + * queue size. + */ + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + if (target >= transport->stream_rcvhiwat(vsk)) { + err = -ENOMEM; + goto out; + } + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + copied = 0; + + err = transport->notify_recv_init(vsk, target, &recv_data); + if (err < 0) + goto out; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (1) { + s64 ready = vsock_stream_has_data(vsk); + + if (ready < 0) { + /* Invalid queue pair content. XXX This should be + * changed to a connection reset in a later change. + */ + + err = -ENOMEM; + goto out_wait; + } else if (ready > 0) { + ssize_t read; + + err = transport->notify_recv_pre_dequeue( + vsk, target, &recv_data); + if (err < 0) + break; + + read = transport->stream_dequeue( + vsk, msg->msg_iov, + len - copied, flags); + if (read < 0) { + err = -ENOMEM; + break; + } + + copied += read; + + err = transport->notify_recv_post_dequeue( + vsk, target, read, + !(flags & MSG_PEEK), &recv_data); + if (err < 0) + goto out_wait; + + if (read >= target || flags & MSG_PEEK) + break; + + target -= read; + } else { + if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) + || (vsk->peer_shutdown & SEND_SHUTDOWN)) { + break; + } + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + break; + } + + err = transport->notify_recv_pre_block( + vsk, target, &recv_data); + if (err < 0) + break; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + break; + } else if (timeout == 0) { + err = -EAGAIN; + break; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } + } + + if (sk->sk_err) + err = -sk->sk_err; + else if (sk->sk_shutdown & RCV_SHUTDOWN) + err = 0; + + if (copied > 0) { + /* We only do these additional bookkeeping/notification steps + * if we actually copied something out of the queue pair + * instead of just peeking ahead. + */ + + if (!(flags & MSG_PEEK)) { + /* If the other side has shutdown for sending and there + * is nothing more to read, then modify the socket + * state. + */ + if (vsk->peer_shutdown & SEND_SHUTDOWN) { + if (vsock_stream_has_data(vsk) <= 0) { + sk->sk_state = SS_UNCONNECTED; + sock_set_flag(sk, SOCK_DONE); + sk->sk_state_change(sk); + } + } + } + err = copied; + } + +out_wait: + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; +} + +static const struct proto_ops vsock_stream_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_stream_connect, + .socketpair = sock_no_socketpair, + .accept = vsock_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = vsock_listen, + .shutdown = vsock_shutdown, + .setsockopt = vsock_stream_setsockopt, + .getsockopt = vsock_stream_getsockopt, + .sendmsg = vsock_stream_sendmsg, + .recvmsg = vsock_stream_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static int vsock_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + if (!sock) + return -EINVAL; + + if (protocol && protocol != PF_VSOCK) + return -EPROTONOSUPPORT; + + switch (sock->type) { + case SOCK_DGRAM: + sock->ops = &vsock_dgram_ops; + break; + case SOCK_STREAM: + sock->ops = &vsock_stream_ops; + break; + default: + return -ESOCKTNOSUPPORT; + } + + sock->state = SS_UNCONNECTED; + + return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM; +} + +static const struct net_proto_family vsock_family_ops = { + .family = AF_VSOCK, + .create = vsock_create, + .owner = THIS_MODULE, +}; + +static long vsock_dev_do_ioctl(struct file *filp, + unsigned int cmd, void __user *ptr) +{ + u32 __user *p = ptr; + int retval = 0; + + switch (cmd) { + case IOCTL_VM_SOCKETS_GET_LOCAL_CID: + if (put_user(transport->get_local_cid(), p) != 0) + retval = -EFAULT; + break; + + default: + pr_err("Unknown ioctl %d\n", cmd); + retval = -EINVAL; + } + + return retval; +} + +static long vsock_dev_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg); +} + +#ifdef CONFIG_COMPAT +static long vsock_dev_compat_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg)); +} +#endif + +static const struct file_operations vsock_device_ops = { + .owner = THIS_MODULE, + .unlocked_ioctl = vsock_dev_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vsock_dev_compat_ioctl, +#endif + .open = nonseekable_open, +}; + +static struct miscdevice vsock_device = { + .name = "vsock", + .minor = MISC_DYNAMIC_MINOR, + .fops = &vsock_device_ops, +}; + +static int __vsock_core_init(void) +{ + int err; + + vsock_init_tables(); + + err = misc_register(&vsock_device); + if (err) { + pr_err("Failed to register misc device\n"); + return -ENOENT; + } + + err = proto_register(&vsock_proto, 1); /* we want our slab */ + if (err) { + pr_err("Cannot register vsock protocol\n"); + goto err_misc_deregister; + } + + err = sock_register(&vsock_family_ops); + if (err) { + pr_err("could not register af_vsock (%d) address family: %d\n", + AF_VSOCK, err); + goto err_unregister_proto; + } + + return 0; + +err_unregister_proto: + proto_unregister(&vsock_proto); +err_misc_deregister: + misc_deregister(&vsock_device); + return err; +} + +int vsock_core_init(const struct vsock_transport *t) +{ + int retval = mutex_lock_interruptible(&vsock_register_mutex); + if (retval) + return retval; + + if (transport) { + retval = -EBUSY; + goto out; + } + + transport = t; + retval = __vsock_core_init(); + if (retval) + transport = NULL; + +out: + mutex_unlock(&vsock_register_mutex); + return retval; +} +EXPORT_SYMBOL_GPL(vsock_core_init); + +void vsock_core_exit(void) +{ + mutex_lock(&vsock_register_mutex); + + misc_deregister(&vsock_device); + sock_unregister(AF_VSOCK); + proto_unregister(&vsock_proto); + + /* We do not want the assignment below re-ordered. */ + mb(); + transport = NULL; + + mutex_unlock(&vsock_register_mutex); +} +EXPORT_SYMBOL_GPL(vsock_core_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMware Virtual Socket Family"); +MODULE_VERSION("1.0.0.0-k"); +MODULE_LICENSE("GPL v2"); diff --git a/net/vmw_vsock/af_vsock.h b/net/vmw_vsock/af_vsock.h new file mode 100644 index 0000000..7d64d36 --- /dev/null +++ b/net/vmw_vsock/af_vsock.h @@ -0,0 +1,175 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __AF_VSOCK_H__ +#define __AF_VSOCK_H__ + +#include <linux/kernel.h> +#include <linux/workqueue.h> +#include <linux/vm_sockets.h> + +#include "vsock_addr.h" + +#define LAST_RESERVED_PORT 1023 + +#define vsock_sk(__sk) ((struct vsock_sock *)__sk) +#define sk_vsock(__vsk) (&(__vsk)->sk) + +struct vsock_sock { + /* sk must be the first member. */ + struct sock sk; + struct sockaddr_vm local_addr; + struct sockaddr_vm remote_addr; + /* Links for the global tables of bound and connected sockets. */ + struct list_head bound_table; + struct list_head connected_table; + /* Accessed without the socket lock held. This means it can never be + * modified outsided of socket create or destruct. + */ + bool trusted; + bool cached_peer_allow_dgram; /* Dgram communication allowed to + * cached peer? + */ + u32 cached_peer; /* Context ID of last dgram destination check. */ + const struct cred *owner; + /* Rest are SOCK_STREAM only. */ + long connect_timeout; + /* Listening socket that this came from. */ + struct sock *listener; + /* Used for pending list and accept queue during connection handshake. + * The listening socket is the head for both lists. Sockets created + * for connection requests are placed in the pending list until they + * are connected, at which point they are put in the accept queue list + * so they can be accepted in accept(). If accept() cannot accept the + * connection, it is marked as rejected so the cleanup function knows + * to clean up the socket. + */ + struct list_head pending_links; + struct list_head accept_queue; + bool rejected; + struct delayed_work dwork; + u32 peer_shutdown; + bool sent_request; + bool ignore_connecting_rst; + + /* Private to transport. */ + void *trans; +}; + +s64 vsock_stream_has_data(struct vsock_sock *vsk); +s64 vsock_stream_has_space(struct vsock_sock *vsk); +void vsock_pending_work(struct work_struct *work); +struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, + gfp_t priority, unsigned short type); + +/**** TRANSPORT ****/ + +struct vsock_transport_recv_notify_data { + u64 data1; /* Transport-defined. */ + u64 data2; /* Transport-defined. */ + bool notify_on_block; +}; + +struct vsock_transport_send_notify_data { + u64 data1; /* Transport-defined. */ + u64 data2; /* Transport-defined. */ +}; + +struct vsock_transport { + /* Initialize/tear-down socket. */ + int (*init)(struct vsock_sock *, struct vsock_sock *); + void (*destruct)(struct vsock_sock *); + void (*release)(struct vsock_sock *); + + /* Connections. */ + int (*connect)(struct vsock_sock *); + + /* DGRAM. */ + int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *); + int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk, + struct msghdr *msg, size_t len, int flags); + int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, + struct iovec *, size_t len); + bool (*dgram_allow)(u32 cid, u32 port); + + /* STREAM. */ + /* TODO: stream_bind() */ + ssize_t (*stream_dequeue)(struct vsock_sock *, struct iovec *, + size_t len, int flags); + ssize_t (*stream_enqueue)(struct vsock_sock *, struct iovec *, + size_t len); + s64 (*stream_has_data)(struct vsock_sock *); + s64 (*stream_has_space)(struct vsock_sock *); + u64 (*stream_rcvhiwat)(struct vsock_sock *); + bool (*stream_is_active)(struct vsock_sock *); + bool (*stream_allow)(u32 cid, u32 port); + + /* Notification. */ + int (*notify_poll_in)(struct vsock_sock *, size_t, bool *); + int (*notify_poll_out)(struct vsock_sock *, size_t, bool *); + int (*notify_recv_init)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_pre_block)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t, + ssize_t, bool, struct vsock_transport_recv_notify_data *); + int (*notify_send_init)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_pre_block)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_pre_enqueue)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t, + struct vsock_transport_send_notify_data *); + + /* Shutdown. */ + int (*shutdown)(struct vsock_sock *, int); + + /* Buffer sizes. */ + void (*set_buffer_size)(struct vsock_sock *, u64); + void (*set_min_buffer_size)(struct vsock_sock *, u64); + void (*set_max_buffer_size)(struct vsock_sock *, u64); + u64 (*get_buffer_size)(struct vsock_sock *); + u64 (*get_min_buffer_size)(struct vsock_sock *); + u64 (*get_max_buffer_size)(struct vsock_sock *); + + /* Addressing. */ + u32 (*get_local_cid)(void); +}; + +/**** CORE ****/ + +int vsock_core_init(const struct vsock_transport *t); +void vsock_core_exit(void); + +/**** UTILS ****/ + +void vsock_release_pending(struct sock *pending); +void vsock_add_pending(struct sock *listener, struct sock *pending); +void vsock_remove_pending(struct sock *listener, struct sock *pending); +void vsock_enqueue_accept(struct sock *listener, struct sock *connected); +void vsock_insert_connected(struct vsock_sock *vsk); +void vsock_remove_bound(struct vsock_sock *vsk); +void vsock_remove_connected(struct vsock_sock *vsk); +struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); +struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst); +void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); + +#endif /* __AF_VSOCK_H__ */ diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c new file mode 100644 index 0000000..5e04d3d --- /dev/null +++ b/net/vmw_vsock/vmci_transport.c @@ -0,0 +1,2165 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/cred.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/kmod.h> +#include <linux/list.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/net.h> +#include <linux/poll.h> +#include <linux/skbuff.h> +#include <linux/smp.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <net/sock.h> + +#include "af_vsock.h" +#include "vmci_transport_notify.h" + +static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg); +static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg); +static void vmci_transport_peer_attach_cb(u32 sub_id, + const struct vmci_event_data *ed, + void *client_data); +static void vmci_transport_peer_detach_cb(u32 sub_id, + const struct vmci_event_data *ed, + void *client_data); +static void vmci_transport_recv_pkt_work(struct work_struct *work); +static int vmci_transport_recv_listen(struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_server( + struct sock *sk, + struct sock *pending, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client_negotiate( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client_invalid( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connected(struct sock *sk, + struct vmci_transport_packet *pkt); +static bool vmci_transport_old_proto_override(bool *old_pkt_proto); +static u16 vmci_transport_new_proto_supported_versions(void); +static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto, + bool old_pkt_proto); + +struct vmci_transport_recv_pkt_info { + struct work_struct work; + struct sock *sk; + struct vmci_transport_packet pkt; +}; + +static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID, + VMCI_INVALID_ID }; +static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + +static int PROTOCOL_OVERRIDE = -1; + +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN 128 +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE 262144 +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX 262144 + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define SS_LISTEN 255 + +/* Helper function to convert from a VMCI error code to a VSock error code. */ + +static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) +{ + int err; + + switch (vmci_error) { + case VMCI_ERROR_NO_MEM: + err = ENOMEM; + break; + case VMCI_ERROR_DUPLICATE_ENTRY: + case VMCI_ERROR_ALREADY_EXISTS: + err = EADDRINUSE; + break; + case VMCI_ERROR_NO_ACCESS: + err = EPERM; + break; + case VMCI_ERROR_NO_RESOURCES: + err = ENOBUFS; + break; + case VMCI_ERROR_INVALID_RESOURCE: + err = EHOSTUNREACH; + break; + case VMCI_ERROR_INVALID_ARGS: + default: + err = EINVAL; + } + + return err > 0 ? -err : err; +} + +static inline void +vmci_transport_packet_init(struct vmci_transport_packet *pkt, + struct sockaddr_vm *src, + struct sockaddr_vm *dst, + u8 type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + /* We register the stream control handler as an any cid handle so we + * must always send from a source address of VMADDR_CID_ANY + */ + pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY, + VMCI_TRANSPORT_PACKET_RID); + pkt->dg.dst = vmci_make_handle(dst->svm_cid, + VMCI_TRANSPORT_PACKET_RID); + pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg); + pkt->version = VMCI_TRANSPORT_PACKET_VERSION; + pkt->type = type; + pkt->src_port = src->svm_port; + pkt->dst_port = dst->svm_port; + memset(&pkt->proto, 0, sizeof(pkt->proto)); + memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2)); + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_INVALID: + pkt->u.size = 0; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_REQUEST: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE: + pkt->u.size = size; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_OFFER: + case VMCI_TRANSPORT_PACKET_TYPE_ATTACH: + pkt->u.handle = handle; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + case VMCI_TRANSPORT_PACKET_TYPE_READ: + case VMCI_TRANSPORT_PACKET_TYPE_RST: + pkt->u.size = 0; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN: + pkt->u.mode = mode; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: + memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait)); + break; + + case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2: + pkt->u.size = size; + pkt->proto = proto; + break; + } +} + +static inline void +vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt, + struct sockaddr_vm *local, + struct sockaddr_vm *remote) +{ + vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port); + vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port); +} + +static int +__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt, + struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle, + bool convert_error) +{ + int err; + + vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait, + proto, handle); + err = vmci_datagram_send(&pkt->dg); + if (convert_error && (err < 0)) + return vmci_transport_error_to_vsock_error(err); + + return err; +} + +static int +vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + struct vmci_handle handle) +{ + struct vmci_transport_packet reply; + struct sockaddr_vm src, dst; + + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) { + return 0; + } else { + vmci_transport_packet_get_addresses(pkt, &src, &dst); + return __vmci_transport_send_control_pkt(&reply, &src, &dst, + type, + size, mode, wait, + VSOCK_PROTO_INVALID, + handle, true); + } +} + +static int +vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + struct vmci_handle handle) +{ + /* Note that it is safe to use a single packet across all CPUs since + * two tasklets of the same type are guaranteed to not ever run + * simultaneously. If that ever changes, or VMCI stops using tasklets, + * we can use per-cpu packets. + */ + static struct vmci_transport_packet pkt; + + return __vmci_transport_send_control_pkt(&pkt, src, dst, type, + size, mode, wait, + VSOCK_PROTO_INVALID, handle, + false); +} + +static int +vmci_transport_send_control_pkt(struct sock *sk, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + struct vmci_transport_packet *pkt; + struct vsock_sock *vsk; + int err; + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + if (!vsock_addr_bound(&vsk->remote_addr)) + return -EINVAL; + + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr, + &vsk->remote_addr, type, size, + mode, wait, proto, handle, + true); + kfree(pkt); + + return err; +} + +static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src, + struct vmci_transport_packet *pkt) +{ + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) + return 0; + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_RST, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_reset(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) + return 0; + return vmci_transport_send_control_pkt(sk, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_negotiate(struct sock *sk, size_t size) +{ + return vmci_transport_send_control_pkt( + sk, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE, + size, 0, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_negotiate2(struct sock *sk, size_t size, + u16 version) +{ + return vmci_transport_send_control_pkt( + sk, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2, + size, 0, NULL, version, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_qp_offer(struct sock *sk, + struct vmci_handle handle) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0, + 0, NULL, + VSOCK_PROTO_INVALID, handle); +} + +static int vmci_transport_send_attach(struct sock *sk, + struct vmci_handle handle) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH, + 0, 0, NULL, VSOCK_PROTO_INVALID, + handle); +} + +static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt) +{ + return vmci_transport_reply_control_pkt_fast( + pkt, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_INVALID, + 0, 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_read_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_READ, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_wrote(struct sock *sk) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0, + 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_read(struct sock *sk) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0, + 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_waiting_write(struct sock *sk, + struct vmci_transport_waiting_info *wait) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE, + 0, 0, wait, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_waiting_read(struct sock *sk, + struct vmci_transport_waiting_info *wait) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ, + 0, 0, wait, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode) +{ + return vmci_transport_send_control_pkt( + &vsk->sk, + VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN, + 0, mode, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_conn_request(struct sock *sk, size_t size) +{ + return vmci_transport_send_control_pkt(sk, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST, + size, 0, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_conn_request2(struct sock *sk, size_t size, + u16 version) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2, + size, 0, NULL, version, + VMCI_INVALID_HANDLE); +} + +static struct sock *vmci_transport_get_pending( + struct sock *listener, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + struct sock *pending; + struct sockaddr_vm src; + + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); + + vlistener = vsock_sk(listener); + + list_for_each_entry(vpending, &vlistener->pending_links, + pending_links) { + if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && + pkt->dst_port == vpending->local_addr.svm_port) { + pending = sk_vsock(vpending); + sock_hold(pending); + goto found; + } + } + + pending = NULL; +found: + return pending; + +} + +static void vmci_transport_release_pending(struct sock *pending) +{ + sock_put(pending); +} + +/* We allow two kinds of sockets to communicate with a restricted VM: 1) + * trusted sockets 2) sockets from applications running as the same user as the + * VM (this is only true for the host side and only when using hosted products) + */ + +static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid) +{ + return vsock->trusted || + vmci_is_context_owner(peer_cid, vsock->owner->uid); +} + +/* We allow sending datagrams to and receiving datagrams from a restricted VM + * only if it is trusted as described in vmci_transport_is_trusted. + */ + +static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid) +{ + if (vsock->cached_peer != peer_cid) { + vsock->cached_peer = peer_cid; + if (!vmci_transport_is_trusted(vsock, peer_cid) && + (vmci_context_get_priv_flags(peer_cid) & + VMCI_PRIVILEGE_FLAG_RESTRICTED)) { + vsock->cached_peer_allow_dgram = false; + } else { + vsock->cached_peer_allow_dgram = true; + } + } + + return vsock->cached_peer_allow_dgram; +} + +static int +vmci_transport_queue_pair_alloc(struct vmci_qp **qpair, + struct vmci_handle *handle, + u64 produce_size, + u64 consume_size, + u32 peer, u32 flags, bool trusted) +{ + int err = 0; + + if (trusted) { + /* Try to allocate our queue pair as trusted. This will only + * work if vsock is running in the host. + */ + + err = vmci_qpair_alloc(qpair, handle, produce_size, + consume_size, + peer, flags, + VMCI_PRIVILEGE_FLAG_TRUSTED); + if (err != VMCI_ERROR_NO_ACCESS) + goto out; + + } + + err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size, + peer, flags, VMCI_NO_PRIVILEGE_FLAGS); +out: + if (err < 0) { + pr_err("Could not attach to queue pair with %d\n", + err); + err = vmci_transport_error_to_vsock_error(err); + } + + return err; +} + +static int +vmci_transport_datagram_create_hnd(u32 resource_id, + u32 flags, + vmci_datagram_recv_cb recv_cb, + void *client_data, + struct vmci_handle *out_handle) +{ + int err = 0; + + /* Try to allocate our datagram handler as trusted. This will only work + * if vsock is running in the host. + */ + + err = vmci_datagram_create_handle_priv(resource_id, flags, + VMCI_PRIVILEGE_FLAG_TRUSTED, + recv_cb, + client_data, out_handle); + + if (err == VMCI_ERROR_NO_ACCESS) + err = vmci_datagram_create_handle(resource_id, flags, + recv_cb, client_data, + out_handle); + + return err; +} + +/* This is invoked as part of a tasklet that's scheduled when the VMCI + * interrupt fires. This is run in bottom-half context and if it ever needs to + * sleep it should defer that work to a work queue. + */ + +static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg) +{ + struct sock *sk; + size_t size; + struct sk_buff *skb; + struct vsock_sock *vsk; + + sk = (struct sock *)data; + + /* This handler is privileged when this module is running on the host. + * We will get datagrams from all endpoints (even VMs that are in a + * restricted context). If we get one from a restricted context then + * the destination socket must be trusted. + * + * NOTE: We access the socket struct without holding the lock here. + * This is ok because the field we are interested is never modified + * outside of the create and destruct socket functions. + */ + vsk = vsock_sk(sk); + if (!vmci_transport_allow_dgram(vsk, dg->src.context)) + return VMCI_ERROR_NO_ACCESS; + + size = VMCI_DG_SIZE(dg); + + /* Attach the packet to the socket's receive queue as an sk_buff. */ + skb = alloc_skb(size, GFP_ATOMIC); + if (skb) { + /* sk_receive_skb() will do a sock_put(), so hold here. */ + sock_hold(sk); + skb_put(skb, size); + memcpy(skb->data, dg, size); + sk_receive_skb(sk, skb, 0); + } + + return VMCI_SUCCESS; +} + +static bool vmci_transport_stream_allow(u32 cid, u32 port) +{ + static const u32 non_socket_contexts[] = { + VMADDR_CID_HYPERVISOR, + VMADDR_CID_RESERVED, + }; + int i; + + BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts)); + + for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) { + if (cid == non_socket_contexts[i]) + return false; + } + + return true; +} + +/* This is invoked as part of a tasklet that's scheduled when the VMCI + * interrupt fires. This is run in bottom-half context but it defers most of + * its work to the packet handling work queue. + */ + +static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) +{ + struct sock *sk; + struct sockaddr_vm dst; + struct sockaddr_vm src; + struct vmci_transport_packet *pkt; + struct vsock_sock *vsk; + bool bh_process_pkt; + int err; + + sk = NULL; + err = VMCI_SUCCESS; + bh_process_pkt = false; + + /* Ignore incoming packets from contexts without sockets, or resources + * that aren't vsock implementations. + */ + + if (!vmci_transport_stream_allow(dg->src.context, -1) + || VMCI_TRANSPORT_PACKET_RID != dg->src.resource) + return VMCI_ERROR_NO_ACCESS; + + if (VMCI_DG_SIZE(dg) < sizeof(*pkt)) + /* Drop datagrams that do not contain full VSock packets. */ + return VMCI_ERROR_INVALID_ARGS; + + pkt = (struct vmci_transport_packet *)dg; + + /* Find the socket that should handle this packet. First we look for a + * connected socket and if there is none we look for a socket bound to + * the destintation address. + */ + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); + vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); + + sk = vsock_find_connected_socket(&src, &dst); + if (!sk) { + sk = vsock_find_bound_socket(&dst); + if (!sk) { + /* We could not find a socket for this specified + * address. If this packet is a RST, we just drop it. + * If it is another packet, we send a RST. Note that + * we do not send a RST reply to RSTs so that we do not + * continually send RSTs between two endpoints. + * + * Note that since this is a reply, dst is src and src + * is dst. + */ + if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0) + pr_err("unable to send reset\n"); + + err = VMCI_ERROR_NOT_FOUND; + goto out; + } + } + + /* If the received packet type is beyond all types known to this + * implementation, reply with an invalid message. Hopefully this will + * help when implementing backwards compatibility in the future. + */ + if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) { + vmci_transport_send_invalid_bh(&dst, &src); + err = VMCI_ERROR_INVALID_ARGS; + goto out; + } + + /* This handler is privileged when this module is running on the host. + * We will get datagram connect requests from all endpoints (even VMs + * that are in a restricted context). If we get one from a restricted + * context then the destination socket must be trusted. + * + * NOTE: We access the socket struct without holding the lock here. + * This is ok because the field we are interested is never modified + * outside of the create and destruct socket functions. + */ + vsk = vsock_sk(sk); + if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) { + err = VMCI_ERROR_NO_ACCESS; + goto out; + } + + /* We do most everything in a work queue, but let's fast path the + * notification of reads and writes to help data transfer performance. + * We can only do this if there is no process context code executing + * for this socket since that may change the state. + */ + bh_lock_sock(sk); + + if (!sock_owned_by_user(sk)) { + /* The local context ID may be out of date, update it. */ + vsk->local_addr.svm_cid = dst.svm_cid; + + if (sk->sk_state == SS_CONNECTED) + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, true, &dst, &src, + &bh_process_pkt); + } + + bh_unlock_sock(sk); + + if (!bh_process_pkt) { + struct vmci_transport_recv_pkt_info *recv_pkt_info; + + recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC); + if (!recv_pkt_info) { + if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0) + pr_err("unable to send reset\n"); + + err = VMCI_ERROR_NO_MEM; + goto out; + } + + recv_pkt_info->sk = sk; + memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt)); + INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work); + + schedule_work(&recv_pkt_info->work); + /* Clear sk so that the reference count incremented by one of + * the Find functions above is not decremented below. We need + * that reference count for the packet handler we've scheduled + * to run. + */ + sk = NULL; + } + +out: + if (sk) + sock_put(sk); + + return err; +} + +static void vmci_transport_peer_attach_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + struct sock *sk = client_data; + const struct vmci_event_payload_qp *e_payload; + struct vsock_sock *vsk; + + e_payload = vmci_event_data_const_payload(e_data); + + vsk = vsock_sk(sk); + + /* We don't ask for delayed CBs when we subscribe to this event (we + * pass 0 as flags to vmci_event_subscribe()). VMCI makes no + * guarantees in that case about what context we might be running in, + * so it could be BH or process, blockable or non-blockable. So we + * need to account for all possible contexts here. + */ + local_bh_disable(); + bh_lock_sock(sk); + + /* XXX This is lame, we should provide a way to lookup sockets by + * qp_handle. + */ + if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, + e_payload->handle)) { + /* XXX This doesn't do anything, but in the future we may want + * to set a flag here to verify the attach really did occur and + * we weren't just sent a datagram claiming it was. + */ + goto out; + } + +out: + bh_unlock_sock(sk); + local_bh_enable(); +} + +static void vmci_transport_handle_detach(struct sock *sk) +{ + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { + sock_set_flag(sk, SOCK_DONE); + + /* On a detach the peer will not be sending or receiving + * anymore. + */ + vsk->peer_shutdown = SHUTDOWN_MASK; + + /* We should not be sending anymore since the peer won't be + * there to receive, but we can still receive if there is data + * left in our consume queue. + */ + if (vsock_stream_has_data(vsk) <= 0) { + if (sk->sk_state == SS_CONNECTING) { + /* The peer may detach from a queue pair while + * we are still in the connecting state, i.e., + * if the peer VM is killed after attaching to + * a queue pair, but before we complete the + * handshake. In that case, we treat the detach + * event like a reset. + */ + + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); + return; + } + sk->sk_state = SS_UNCONNECTED; + } + sk->sk_state_change(sk); + } +} + +static void vmci_transport_peer_detach_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + struct sock *sk = client_data; + const struct vmci_event_payload_qp *e_payload; + struct vsock_sock *vsk; + + e_payload = vmci_event_data_const_payload(e_data); + vsk = vsock_sk(sk); + if (vmci_handle_is_invalid(e_payload->handle)) + return; + + /* Same rules for locking as for peer_attach_cb(). */ + local_bh_disable(); + bh_lock_sock(sk); + + /* XXX This is lame, we should provide a way to lookup sockets by + * qp_handle. + */ + if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, + e_payload->handle)) + vmci_transport_handle_detach(sk); + + bh_unlock_sock(sk); + local_bh_enable(); +} + +static void vmci_transport_qp_resumed_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + vsock_for_each_connected_socket(vmci_transport_handle_detach); +} + +static void vmci_transport_recv_pkt_work(struct work_struct *work) +{ + struct vmci_transport_recv_pkt_info *recv_pkt_info; + struct vmci_transport_packet *pkt; + struct sock *sk; + + recv_pkt_info = + container_of(work, struct vmci_transport_recv_pkt_info, work); + sk = recv_pkt_info->sk; + pkt = &recv_pkt_info->pkt; + + lock_sock(sk); + + /* The local context ID may be out of date. */ + vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context; + + switch (sk->sk_state) { + case SS_LISTEN: + vmci_transport_recv_listen(sk, pkt); + break; + case SS_CONNECTING: + /* Processing of pending connections for servers goes through + * the listening socket, so see vmci_transport_recv_listen() + * for that path. + */ + vmci_transport_recv_connecting_client(sk, pkt); + break; + case SS_CONNECTED: + vmci_transport_recv_connected(sk, pkt); + break; + default: + /* Because this function does not run in the same context as + * vmci_transport_recv_stream_cb it is possible that the + * socket has closed. We need to let the other side know or it + * could be sitting in a connect and hang forever. Send a + * reset to prevent that. + */ + vmci_transport_send_reset(sk, pkt); + goto out; + } + +out: + release_sock(sk); + kfree(recv_pkt_info); + /* Release reference obtained in the stream callback when we fetched + * this socket out of the bound or connected list. + */ + sock_put(sk); +} + +static int vmci_transport_recv_listen(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct sock *pending; + struct vsock_sock *vpending; + int err; + u64 qp_size; + bool old_request = false; + bool old_pkt_proto = false; + + err = 0; + + /* Because we are in the listen state, we could be receiving a packet + * for ourself or any previous connection requests that we received. + * If it's the latter, we try to find a socket in our list of pending + * connections and, if we do, call the appropriate handler for the + * state that that socket is in. Otherwise we try to service the + * connection request. + */ + pending = vmci_transport_get_pending(sk, pkt); + if (pending) { + lock_sock(pending); + + /* The local context ID may be out of date. */ + vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context; + + switch (pending->sk_state) { + case SS_CONNECTING: + err = vmci_transport_recv_connecting_server(sk, + pending, + pkt); + break; + default: + vmci_transport_send_reset(pending, pkt); + err = -EINVAL; + } + + if (err < 0) + vsock_remove_pending(sk, pending); + + release_sock(pending); + vmci_transport_release_pending(pending); + + return err; + } + + /* The listen state only accepts connection requests. Reply with a + * reset unless we received a reset. + */ + + if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST || + pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) { + vmci_transport_reply_reset(pkt); + return -EINVAL; + } + + if (pkt->u.size == 0) { + vmci_transport_reply_reset(pkt); + return -EINVAL; + } + + /* If this socket can't accommodate this connection request, we send a + * reset. Otherwise we create and initialize a child socket and reply + * with a connection negotiation. + */ + if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) { + vmci_transport_reply_reset(pkt); + return -ECONNREFUSED; + } + + pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, + sk->sk_type); + if (!pending) { + vmci_transport_send_reset(sk, pkt); + return -ENOMEM; + } + + vpending = vsock_sk(pending); + + vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context, + pkt->dst_port); + vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context, + pkt->src_port); + + /* If the proposed size fits within our min/max, accept it. Otherwise + * propose our own size. + */ + if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size && + pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) { + qp_size = pkt->u.size; + } else { + qp_size = vmci_trans(vpending)->queue_pair_size; + } + + /* Figure out if we are using old or new requests based on the + * overrides pkt types sent by our peer. + */ + if (vmci_transport_old_proto_override(&old_pkt_proto)) { + old_request = old_pkt_proto; + } else { + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST) + old_request = true; + else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2) + old_request = false; + + } + + if (old_request) { + /* Handle a REQUEST (or override) */ + u16 version = VSOCK_PROTO_INVALID; + if (vmci_transport_proto_to_notify_struct( + pending, &version, true)) + err = vmci_transport_send_negotiate(pending, qp_size); + else + err = -EINVAL; + + } else { + /* Handle a REQUEST2 (or override) */ + int proto_int = pkt->proto; + int pos; + u16 active_proto_version = 0; + + /* The list of possible protocols is the intersection of all + * protocols the client supports ... plus all the protocols we + * support. + */ + proto_int &= vmci_transport_new_proto_supported_versions(); + + /* We choose the highest possible protocol version and use that + * one. + */ + pos = fls(proto_int); + if (pos) { + active_proto_version = (1 << (pos - 1)); + if (vmci_transport_proto_to_notify_struct( + pending, &active_proto_version, false)) + err = vmci_transport_send_negotiate2(pending, + qp_size, + active_proto_version); + else + err = -EINVAL; + + } else { + err = -EINVAL; + } + } + + if (err < 0) { + vmci_transport_send_reset(sk, pkt); + sock_put(pending); + err = vmci_transport_error_to_vsock_error(err); + goto out; + } + + vsock_add_pending(sk, pending); + sk->sk_ack_backlog++; + + pending->sk_state = SS_CONNECTING; + vmci_trans(vpending)->produce_size = + vmci_trans(vpending)->consume_size = qp_size; + vmci_trans(vpending)->queue_pair_size = qp_size; + + vmci_trans(vpending)->notify_ops->process_request(pending); + + /* We might never receive another message for this socket and it's not + * connected to any process, so we have to ensure it gets cleaned up + * ourself. Our delayed work function will take care of that. Note + * that we do not ever cancel this function since we have few + * guarantees about its state when calling cancel_delayed_work(). + * Instead we hold a reference on the socket for that function and make + * it capable of handling cases where it needs to do nothing but + * release that reference. + */ + vpending->listener = sk; + sock_hold(sk); + sock_hold(pending); + INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work); + schedule_delayed_work(&vpending->dwork, HZ); + +out: + return err; +} + +static int +vmci_transport_recv_connecting_server(struct sock *listener, + struct sock *pending, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vpending; + struct vmci_handle handle; + struct vmci_qp *qpair; + bool is_local; + u32 flags; + u32 detach_sub_id; + int err; + int skerr; + + vpending = vsock_sk(pending); + detach_sub_id = VMCI_INVALID_ID; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_OFFER: + if (vmci_handle_is_invalid(pkt->u.handle)) { + vmci_transport_send_reset(pending, pkt); + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + break; + default: + /* Close and cleanup the connection. */ + vmci_transport_send_reset(pending, pkt); + skerr = EPROTO; + err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL; + goto destroy; + } + + /* In order to complete the connection we need to attach to the offered + * queue pair and send an attach notification. We also subscribe to the + * detach event so we know when our peer goes away, and we do that + * before attaching so we don't miss an event. If all this succeeds, + * we update our state and wakeup anything waiting in accept() for a + * connection. + */ + + /* We don't care about attach since we ensure the other side has + * attached by specifying the ATTACH_ONLY flag below. + */ + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, + vmci_transport_peer_detach_cb, + pending, &detach_sub_id); + if (err < VMCI_SUCCESS) { + vmci_transport_send_reset(pending, pkt); + err = vmci_transport_error_to_vsock_error(err); + skerr = -err; + goto destroy; + } + + vmci_trans(vpending)->detach_sub_id = detach_sub_id; + + /* Now attach to the queue pair the client created. */ + handle = pkt->u.handle; + + /* vpending->local_addr always has a context id so we do not need to + * worry about VMADDR_CID_ANY in this case. + */ + is_local = + vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid; + flags = VMCI_QPFLAG_ATTACH_ONLY; + flags |= is_local ? VMCI_QPFLAG_LOCAL : 0; + + err = vmci_transport_queue_pair_alloc( + &qpair, + &handle, + vmci_trans(vpending)->produce_size, + vmci_trans(vpending)->consume_size, + pkt->dg.src.context, + flags, + vmci_transport_is_trusted( + vpending, + vpending->remote_addr.svm_cid)); + if (err < 0) { + vmci_transport_send_reset(pending, pkt); + skerr = -err; + goto destroy; + } + + vmci_trans(vpending)->qp_handle = handle; + vmci_trans(vpending)->qpair = qpair; + + /* When we send the attach message, we must be ready to handle incoming + * control messages on the newly connected socket. So we move the + * pending socket to the connected state before sending the attach + * message. Otherwise, an incoming packet triggered by the attach being + * received by the peer may be processed concurrently with what happens + * below after sending the attach message, and that incoming packet + * will find the listening socket instead of the (currently) pending + * socket. Note that enqueueing the socket increments the reference + * count, so even if a reset comes before the connection is accepted, + * the socket will be valid until it is removed from the queue. + * + * If we fail sending the attach below, we remove the socket from the + * connected list and move the socket to SS_UNCONNECTED before + * releasing the lock, so a pending slow path processing of an incoming + * packet will not see the socket in the connected state in that case. + */ + pending->sk_state = SS_CONNECTED; + + vsock_insert_connected(vpending); + + /* Notify our peer of our attach. */ + err = vmci_transport_send_attach(pending, handle); + if (err < 0) { + vsock_remove_connected(vpending); + pr_err("Could not send attach\n"); + vmci_transport_send_reset(pending, pkt); + err = vmci_transport_error_to_vsock_error(err); + skerr = -err; + goto destroy; + } + + /* We have a connection. Move the now connected socket from the + * listener's pending list to the accept queue so callers of accept() + * can find it. + */ + vsock_remove_pending(listener, pending); + vsock_enqueue_accept(listener, pending); + + /* Callers of accept() will be be waiting on the listening socket, not + * the pending socket. + */ + listener->sk_state_change(listener); + + return 0; + +destroy: + pending->sk_err = skerr; + pending->sk_state = SS_UNCONNECTED; + /* As long as we drop our reference, all necessary cleanup will handle + * when the cleanup function drops its reference and our destruct + * implementation is called. Note that since the listen handler will + * remove pending from the pending list upon our failure, the cleanup + * function won't drop the additional reference, which is why we do it + * here. + */ + sock_put(pending); + + return err; +} + +static int +vmci_transport_recv_connecting_client(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vsk; + int err; + int skerr; + + vsk = vsock_sk(sk); + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_ATTACH: + if (vmci_handle_is_invalid(pkt->u.handle) || + !vmci_handle_is_equal(pkt->u.handle, + vmci_trans(vsk)->qp_handle)) { + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + + /* Signify the socket is connected and wakeup the waiter in + * connect(). Also place the socket in the connected table for + * accounting (it can already be found since it's in the bound + * table). + */ + sk->sk_state = SS_CONNECTED; + sk->sk_socket->state = SS_CONNECTED; + vsock_insert_connected(vsk); + sk->sk_state_change(sk); + + break; + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2: + if (pkt->u.size == 0 + || pkt->dg.src.context != vsk->remote_addr.svm_cid + || pkt->src_port != vsk->remote_addr.svm_port + || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle) + || vmci_trans(vsk)->qpair + || vmci_trans(vsk)->produce_size != 0 + || vmci_trans(vsk)->consume_size != 0 + || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID + || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { + skerr = EPROTO; + err = -EINVAL; + + goto destroy; + } + + err = vmci_transport_recv_connecting_client_negotiate(sk, pkt); + if (err) { + skerr = -err; + goto destroy; + } + + break; + case VMCI_TRANSPORT_PACKET_TYPE_INVALID: + err = vmci_transport_recv_connecting_client_invalid(sk, pkt); + if (err) { + skerr = -err; + goto destroy; + } + + break; + case VMCI_TRANSPORT_PACKET_TYPE_RST: + /* Older versions of the linux code (WS 6.5 / ESX 4.0) used to + * continue processing here after they sent an INVALID packet. + * This meant that we got a RST after the INVALID. We ignore a + * RST after an INVALID. The common code doesn't send the RST + * ... so we can hang if an old version of the common code + * fails between getting a REQUEST and sending an OFFER back. + * Not much we can do about it... except hope that it doesn't + * happen. + */ + if (vsk->ignore_connecting_rst) { + vsk->ignore_connecting_rst = false; + } else { + skerr = ECONNRESET; + err = 0; + goto destroy; + } + + break; + default: + /* Close and cleanup the connection. */ + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + + return 0; + +destroy: + vmci_transport_send_reset(sk, pkt); + + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = skerr; + sk->sk_error_report(sk); + return err; +} + +static int vmci_transport_recv_connecting_client_negotiate( + struct sock *sk, + struct vmci_transport_packet *pkt) +{ + int err; + struct vsock_sock *vsk; + struct vmci_handle handle; + struct vmci_qp *qpair; + u32 attach_sub_id; + u32 detach_sub_id; + bool is_local; + u32 flags; + bool old_proto = true; + bool old_pkt_proto; + u16 version; + + vsk = vsock_sk(sk); + handle = VMCI_INVALID_HANDLE; + attach_sub_id = VMCI_INVALID_ID; + detach_sub_id = VMCI_INVALID_ID; + + /* If we have gotten here then we should be past the point where old + * linux vsock could have sent the bogus rst. + */ + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + + /* Verify that we're OK with the proposed queue pair size */ + if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size || + pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) { + err = -EINVAL; + goto destroy; + } + + /* At this point we know the CID the peer is using to talk to us. */ + + if (vsk->local_addr.svm_cid == VMADDR_CID_ANY) + vsk->local_addr.svm_cid = pkt->dg.dst.context; + + /* Setup the notify ops to be the highest supported version that both + * the server and the client support. + */ + + if (vmci_transport_old_proto_override(&old_pkt_proto)) { + old_proto = old_pkt_proto; + } else { + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE) + old_proto = true; + else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2) + old_proto = false; + + } + + if (old_proto) + version = VSOCK_PROTO_INVALID; + else + version = pkt->proto; + + if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) { + err = -EINVAL; + goto destroy; + } + + /* Subscribe to attach and detach events first. + * + * XXX We attach once for each queue pair created for now so it is easy + * to find the socket (it's provided), but later we should only + * subscribe once and add a way to lookup sockets by queue pair handle. + */ + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH, + vmci_transport_peer_attach_cb, + sk, &attach_sub_id); + if (err < VMCI_SUCCESS) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, + vmci_transport_peer_detach_cb, + sk, &detach_sub_id); + if (err < VMCI_SUCCESS) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + /* Make VMCI select the handle for us. */ + handle = VMCI_INVALID_HANDLE; + is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid; + flags = is_local ? VMCI_QPFLAG_LOCAL : 0; + + err = vmci_transport_queue_pair_alloc(&qpair, + &handle, + pkt->u.size, + pkt->u.size, + vsk->remote_addr.svm_cid, + flags, + vmci_transport_is_trusted( + vsk, + vsk-> + remote_addr.svm_cid)); + if (err < 0) + goto destroy; + + err = vmci_transport_send_qp_offer(sk, handle); + if (err < 0) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + vmci_trans(vsk)->qp_handle = handle; + vmci_trans(vsk)->qpair = qpair; + + vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = + pkt->u.size; + + vmci_trans(vsk)->attach_sub_id = attach_sub_id; + vmci_trans(vsk)->detach_sub_id = detach_sub_id; + + vmci_trans(vsk)->notify_ops->process_negotiate(sk); + + return 0; + +destroy: + if (attach_sub_id != VMCI_INVALID_ID) + vmci_event_unsubscribe(attach_sub_id); + + if (detach_sub_id != VMCI_INVALID_ID) + vmci_event_unsubscribe(detach_sub_id); + + if (!vmci_handle_is_invalid(handle)) + vmci_qpair_detach(&qpair); + + return err; +} + +static int +vmci_transport_recv_connecting_client_invalid(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + int err = 0; + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsk->sent_request) { + vsk->sent_request = false; + vsk->ignore_connecting_rst = true; + + err = vmci_transport_send_conn_request( + sk, vmci_trans(vsk)->queue_pair_size); + if (err < 0) + err = vmci_transport_error_to_vsock_error(err); + else + err = 0; + + } + + return err; +} + +static int vmci_transport_recv_connected(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vsk; + bool pkt_processed = false; + + /* In cases where we are closing the connection, it's sufficient to + * mark the state change (and maybe error) and wake up any waiting + * threads. Since this is a connected socket, it's owned by a user + * process and will be cleaned up when the failure is passed back on + * the current or next system call. Our system call implementations + * must therefore check for error and state changes on entry and when + * being awoken. + */ + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN: + if (pkt->u.mode) { + vsk = vsock_sk(sk); + + vsk->peer_shutdown |= pkt->u.mode; + sk->sk_state_change(sk); + } + break; + + case VMCI_TRANSPORT_PACKET_TYPE_RST: + vsk = vsock_sk(sk); + /* It is possible that we sent our peer a message (e.g a + * WAITING_READ) right before we got notified that the peer had + * detached. If that happens then we can get a RST pkt back + * from our peer even though there is data available for us to + * read. In that case, don't shutdown the socket completely but + * instead allow the local client to finish reading data off + * the queuepair. Always treat a RST pkt in connected mode like + * a clean shutdown. + */ + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = SS_DISCONNECTING; + + sk->sk_state_change(sk); + break; + + default: + vsk = vsock_sk(sk); + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, false, NULL, NULL, + &pkt_processed); + if (!pkt_processed) + return -EINVAL; + + break; + } + + return 0; +} + +static int vmci_transport_socket_init(struct vsock_sock *vsk, + struct vsock_sock *psk) +{ + vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL); + if (!vsk->trans) + return -ENOMEM; + + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->qpair = NULL; + vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0; + vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id = + VMCI_INVALID_ID; + vmci_trans(vsk)->notify_ops = NULL; + if (psk) { + vmci_trans(vsk)->queue_pair_size = + vmci_trans(psk)->queue_pair_size; + vmci_trans(vsk)->queue_pair_min_size = + vmci_trans(psk)->queue_pair_min_size; + vmci_trans(vsk)->queue_pair_max_size = + vmci_trans(psk)->queue_pair_max_size; + } else { + vmci_trans(vsk)->queue_pair_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE; + vmci_trans(vsk)->queue_pair_min_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN; + vmci_trans(vsk)->queue_pair_max_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX; + } + + return 0; +} + +static void vmci_transport_destruct(struct vsock_sock *vsk) +{ + if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id); + vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID; + } + + if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id); + vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID; + } + + if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { + vmci_qpair_detach(&vmci_trans(vsk)->qpair); + vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->produce_size = 0; + vmci_trans(vsk)->consume_size = 0; + } + + if (vmci_trans(vsk)->notify_ops) + vmci_trans(vsk)->notify_ops->socket_destruct(vsk); + + kfree(vsk->trans); + vsk->trans = NULL; +} + +static void vmci_transport_release(struct vsock_sock *vsk) +{ + if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) { + vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle); + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; + } +} + +static int vmci_transport_dgram_bind(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + u32 port; + u32 flags; + int err; + + /* VMCI will select a resource ID for us if we provide + * VMCI_INVALID_ID. + */ + port = addr->svm_port == VMADDR_PORT_ANY ? + VMCI_INVALID_ID : addr->svm_port; + + if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + flags = addr->svm_cid == VMADDR_CID_ANY ? + VMCI_FLAG_ANYCID_DG_HND : 0; + + err = vmci_transport_datagram_create_hnd(port, flags, + vmci_transport_recv_dgram_cb, + &vsk->sk, + &vmci_trans(vsk)->dg_handle); + if (err < VMCI_SUCCESS) + return vmci_transport_error_to_vsock_error(err); + vsock_addr_init(&vsk->local_addr, addr->svm_cid, + vmci_trans(vsk)->dg_handle.resource); + + return 0; +} + +static int vmci_transport_dgram_enqueue( + struct vsock_sock *vsk, + struct sockaddr_vm *remote_addr, + struct iovec *iov, + size_t len) +{ + int err; + struct vmci_datagram *dg; + + if (len > VMCI_MAX_DG_PAYLOAD_SIZE) + return -EMSGSIZE; + + if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid)) + return -EPERM; + + /* Allocate a buffer for the user's message and our packet header. */ + dg = kmalloc(len + sizeof(*dg), GFP_KERNEL); + if (!dg) + return -ENOMEM; + + memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), iov, len); + + dg->dst = vmci_make_handle(remote_addr->svm_cid, + remote_addr->svm_port); + dg->src = vmci_make_handle(vsk->local_addr.svm_cid, + vsk->local_addr.svm_port); + dg->payload_size = len; + + err = vmci_datagram_send(dg); + kfree(dg); + if (err < 0) + return vmci_transport_error_to_vsock_error(err); + + return err - sizeof(*dg); +} + +static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, + struct vsock_sock *vsk, + struct msghdr *msg, size_t len, + int flags) +{ + int err; + int noblock; + struct vmci_datagram *dg; + size_t payload_len; + struct sk_buff *skb; + + noblock = flags & MSG_DONTWAIT; + + if (flags & MSG_OOB || flags & MSG_ERRQUEUE) + return -EOPNOTSUPP; + + msg->msg_namelen = 0; + + /* Retrieve the head sk_buff from the socket's receive queue. */ + err = 0; + skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); + if (err) + return err; + + if (!skb) + return -EAGAIN; + + dg = (struct vmci_datagram *)skb->data; + if (!dg) + /* err is 0, meaning we read zero bytes. */ + goto out; + + payload_len = dg->payload_size; + /* Ensure the sk_buff matches the payload size claimed in the packet. */ + if (payload_len != skb->len - sizeof(*dg)) { + err = -EINVAL; + goto out; + } + + if (payload_len > len) { + payload_len = len; + msg->msg_flags |= MSG_TRUNC; + } + + /* Place the datagram payload in the user's iovec. */ + err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov, + payload_len); + if (err) + goto out; + + if (msg->msg_name) { + struct sockaddr_vm *vm_addr; + + /* Provide the address of the sender. */ + vm_addr = (struct sockaddr_vm *)msg->msg_name; + vsock_addr_init(vm_addr, dg->src.context, dg->src.resource); + msg->msg_namelen = sizeof(*vm_addr); + } + err = payload_len; + +out: + skb_free_datagram(&vsk->sk, skb); + return err; +} + +static bool vmci_transport_dgram_allow(u32 cid, u32 port) +{ + if (cid == VMADDR_CID_HYPERVISOR) { + /* Registrations of PBRPC Servers do not modify VMX/Hypervisor + * state and are allowed. + */ + return port == VMCI_UNITY_PBRPC_REGISTER; + } + + return true; +} + +static int vmci_transport_connect(struct vsock_sock *vsk) +{ + int err; + bool old_pkt_proto = false; + struct sock *sk = &vsk->sk; + + if (vmci_transport_old_proto_override(&old_pkt_proto) && + old_pkt_proto) { + err = vmci_transport_send_conn_request( + sk, vmci_trans(vsk)->queue_pair_size); + if (err < 0) { + sk->sk_state = SS_UNCONNECTED; + return err; + } + } else { + int supported_proto_versions = + vmci_transport_new_proto_supported_versions(); + err = vmci_transport_send_conn_request2( + sk, vmci_trans(vsk)->queue_pair_size, + supported_proto_versions); + if (err < 0) { + sk->sk_state = SS_UNCONNECTED; + return err; + } + + vsk->sent_request = true; + } + + return err; +} + +static ssize_t vmci_transport_stream_dequeue( + struct vsock_sock *vsk, + struct iovec *iov, + size_t len, + int flags) +{ + if (flags & MSG_PEEK) + return vmci_qpair_peekv(vmci_trans(vsk)->qpair, iov, len, 0); + else + return vmci_qpair_dequev(vmci_trans(vsk)->qpair, iov, len, 0); +} + +static ssize_t vmci_transport_stream_enqueue( + struct vsock_sock *vsk, + struct iovec *iov, + size_t len) +{ + return vmci_qpair_enquev(vmci_trans(vsk)->qpair, iov, len, 0); +} + +static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk) +{ + return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair); +} + +static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk) +{ + return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair); +} + +static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->consume_size; +} + +static bool vmci_transport_stream_is_active(struct vsock_sock *vsk) +{ + return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle); +} + +static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_size; +} + +static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_min_size; +} + +static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_max_size; +} + +static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) +{ + if (val < vmci_trans(vsk)->queue_pair_min_size) + vmci_trans(vsk)->queue_pair_min_size = val; + if (val > vmci_trans(vsk)->queue_pair_max_size) + vmci_trans(vsk)->queue_pair_max_size = val; + vmci_trans(vsk)->queue_pair_size = val; +} + +static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk, + u64 val) +{ + if (val > vmci_trans(vsk)->queue_pair_size) + vmci_trans(vsk)->queue_pair_size = val; + vmci_trans(vsk)->queue_pair_min_size = val; +} + +static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk, + u64 val) +{ + if (val < vmci_trans(vsk)->queue_pair_size) + vmci_trans(vsk)->queue_pair_size = val; + vmci_trans(vsk)->queue_pair_max_size = val; +} + +static int vmci_transport_notify_poll_in( + struct vsock_sock *vsk, + size_t target, + bool *data_ready_now) +{ + return vmci_trans(vsk)->notify_ops->poll_in( + &vsk->sk, target, data_ready_now); +} + +static int vmci_transport_notify_poll_out( + struct vsock_sock *vsk, + size_t target, + bool *space_available_now) +{ + return vmci_trans(vsk)->notify_ops->poll_out( + &vsk->sk, target, space_available_now); +} + +static int vmci_transport_notify_recv_init( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_init( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_pre_block( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_pre_block( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_pre_dequeue( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_pre_dequeue( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_post_dequeue( + struct vsock_sock *vsk, + size_t target, + ssize_t copied, + bool data_read, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_post_dequeue( + &vsk->sk, target, copied, data_read, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_send_init( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_init( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_pre_block( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_pre_block( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_pre_enqueue( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_pre_enqueue( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_post_enqueue( + struct vsock_sock *vsk, + ssize_t written, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_post_enqueue( + &vsk->sk, written, + (struct vmci_transport_send_notify_data *)data); +} + +static bool vmci_transport_old_proto_override(bool *old_pkt_proto) +{ + if (PROTOCOL_OVERRIDE != -1) { + if (PROTOCOL_OVERRIDE == 0) + *old_pkt_proto = true; + else + *old_pkt_proto = false; + + pr_info("Proto override in use\n"); + return true; + } + + return false; +} + +static bool vmci_transport_proto_to_notify_struct(struct sock *sk, + u16 *proto, + bool old_pkt_proto) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (old_pkt_proto) { + if (*proto != VSOCK_PROTO_INVALID) { + pr_err("Can't set both an old and new protocol\n"); + return false; + } + vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops; + goto exit; + } + + switch (*proto) { + case VSOCK_PROTO_PKT_ON_NOTIFY: + vmci_trans(vsk)->notify_ops = + &vmci_transport_notify_pkt_q_state_ops; + break; + default: + pr_err("Unknown notify protocol version\n"); + return false; + } + +exit: + vmci_trans(vsk)->notify_ops->socket_init(sk); + return true; +} + +static u16 vmci_transport_new_proto_supported_versions(void) +{ + if (PROTOCOL_OVERRIDE != -1) + return PROTOCOL_OVERRIDE; + + return VSOCK_PROTO_ALL_SUPPORTED; +} + +static u32 vmci_transport_get_local_cid(void) +{ + return vmci_get_context_id(); +} + +static struct vsock_transport vmci_transport = { + .init = vmci_transport_socket_init, + .destruct = vmci_transport_destruct, + .release = vmci_transport_release, + .connect = vmci_transport_connect, + .dgram_bind = vmci_transport_dgram_bind, + .dgram_dequeue = vmci_transport_dgram_dequeue, + .dgram_enqueue = vmci_transport_dgram_enqueue, + .dgram_allow = vmci_transport_dgram_allow, + .stream_dequeue = vmci_transport_stream_dequeue, + .stream_enqueue = vmci_transport_stream_enqueue, + .stream_has_data = vmci_transport_stream_has_data, + .stream_has_space = vmci_transport_stream_has_space, + .stream_rcvhiwat = vmci_transport_stream_rcvhiwat, + .stream_is_active = vmci_transport_stream_is_active, + .stream_allow = vmci_transport_stream_allow, + .notify_poll_in = vmci_transport_notify_poll_in, + .notify_poll_out = vmci_transport_notify_poll_out, + .notify_recv_init = vmci_transport_notify_recv_init, + .notify_recv_pre_block = vmci_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue, + .notify_send_init = vmci_transport_notify_send_init, + .notify_send_pre_block = vmci_transport_notify_send_pre_block, + .notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue, + .shutdown = vmci_transport_shutdown, + .set_buffer_size = vmci_transport_set_buffer_size, + .set_min_buffer_size = vmci_transport_set_min_buffer_size, + .set_max_buffer_size = vmci_transport_set_max_buffer_size, + .get_buffer_size = vmci_transport_get_buffer_size, + .get_min_buffer_size = vmci_transport_get_min_buffer_size, + .get_max_buffer_size = vmci_transport_get_max_buffer_size, + .get_local_cid = vmci_transport_get_local_cid, +}; + +static int __init vmci_transport_init(void) +{ + int err; + + /* Create the datagram handle that we will use to send and receive all + * VSocket control messages for this context. + */ + err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID, + VMCI_FLAG_ANYCID_DG_HND, + vmci_transport_recv_stream_cb, + NULL, + &vmci_transport_stream_handle); + if (err < VMCI_SUCCESS) { + pr_err("Unable to create datagram handle. (%d)\n", err); + return vmci_transport_error_to_vsock_error(err); + } + + err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED, + vmci_transport_qp_resumed_cb, + NULL, &vmci_transport_qp_resumed_sub_id); + if (err < VMCI_SUCCESS) { + pr_err("Unable to subscribe to resumed event. (%d)\n", err); + err = vmci_transport_error_to_vsock_error(err); + vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + goto err_destroy_stream_handle; + } + + err = vsock_core_init(&vmci_transport); + if (err < 0) + goto err_unsubscribe; + + return 0; + +err_unsubscribe: + vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id); +err_destroy_stream_handle: + vmci_datagram_destroy_handle(vmci_transport_stream_handle); + return err; +} +module_init(vmci_transport_init); + +static void __exit vmci_transport_exit(void) +{ + if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) { + if (vmci_datagram_destroy_handle( + vmci_transport_stream_handle) != VMCI_SUCCESS) + pr_err("Couldn't destroy datagram handle\n"); + vmci_transport_stream_handle = VMCI_INVALID_HANDLE; + } + + if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id); + vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + } + + vsock_core_exit(); +} +module_exit(vmci_transport_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("vmware_vsock"); +MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h new file mode 100644 index 0000000..1bf9918 --- /dev/null +++ b/net/vmw_vsock/vmci_transport.h @@ -0,0 +1,139 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VMCI_TRANSPORT_H_ +#define _VMCI_TRANSPORT_H_ + +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> + +#include "vsock_addr.h" +#include "af_vsock.h" + +/* If the packet format changes in a release then this should change too. */ +#define VMCI_TRANSPORT_PACKET_VERSION 1 + +/* The resource ID on which control packets are sent. */ +#define VMCI_TRANSPORT_PACKET_RID 1 + +#define VSOCK_PROTO_INVALID 0 +#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0) +#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY) + +#define vmci_trans(_vsk) ((struct vmci_transport *)((_vsk)->trans)) + +enum vmci_transport_packet_type { + VMCI_TRANSPORT_PACKET_TYPE_INVALID = 0, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE, + VMCI_TRANSPORT_PACKET_TYPE_OFFER, + VMCI_TRANSPORT_PACKET_TYPE_ATTACH, + VMCI_TRANSPORT_PACKET_TYPE_WROTE, + VMCI_TRANSPORT_PACKET_TYPE_READ, + VMCI_TRANSPORT_PACKET_TYPE_RST, + VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN, + VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE, + VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST2, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2, + VMCI_TRANSPORT_PACKET_TYPE_MAX +}; + +struct vmci_transport_waiting_info { + u64 generation; + u64 offset; +}; + +/* Control packet type for STREAM sockets. DGRAMs have no control packets nor + * special packet header for data packets, they are just raw VMCI DGRAM + * messages. For STREAMs, control packets are sent over the control channel + * while data is written and read directly from queue pairs with no packet + * format. + */ +struct vmci_transport_packet { + struct vmci_datagram dg; + u8 version; + u8 type; + u16 proto; + u32 src_port; + u32 dst_port; + u32 _reserved2; + union { + u64 size; + u64 mode; + struct vmci_handle handle; + struct vmci_transport_waiting_info wait; + } u; +}; + +struct vmci_transport_notify_pkt { + u64 write_notify_window; + u64 write_notify_min_window; + bool peer_waiting_read; + bool peer_waiting_write; + bool peer_waiting_write_detected; + bool sent_waiting_read; + bool sent_waiting_write; + struct vmci_transport_waiting_info peer_waiting_read_info; + struct vmci_transport_waiting_info peer_waiting_write_info; + u64 produce_q_generation; + u64 consume_q_generation; +}; + +struct vmci_transport_notify_pkt_q_state { + u64 write_notify_window; + u64 write_notify_min_window; + bool peer_waiting_write; + bool peer_waiting_write_detected; +}; + +union vmci_transport_notify { + struct vmci_transport_notify_pkt pkt; + struct vmci_transport_notify_pkt_q_state pkt_q_state; +}; + +/* Our transport-specific data. */ +struct vmci_transport { + /* For DGRAMs. */ + struct vmci_handle dg_handle; + /* For STREAMs. */ + struct vmci_handle qp_handle; + struct vmci_qp *qpair; + u64 produce_size; + u64 consume_size; + u64 queue_pair_size; + u64 queue_pair_min_size; + u64 queue_pair_max_size; + u32 attach_sub_id; + u32 detach_sub_id; + union vmci_transport_notify notify; + struct vmci_transport_notify_ops *notify_ops; +}; + +int vmci_transport_register(void); +void vmci_transport_unregister(void); + +int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src); +int vmci_transport_send_read_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src); +int vmci_transport_send_wrote(struct sock *sk); +int vmci_transport_send_read(struct sock *sk); +int vmci_transport_send_waiting_write(struct sock *sk, + struct vmci_transport_waiting_info *wait); +int vmci_transport_send_waiting_read(struct sock *sk, + struct vmci_transport_waiting_info *wait); + +#endif diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c new file mode 100644 index 0000000..9a73074 --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -0,0 +1,680 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <net/sock.h> + +#include "vmci_transport_notify.h" + +#define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name) + +static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + bool retval; + u64 notify_limit; + + if (!PKT_FIELD(vsk, peer_waiting_write)) + return false; + +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + /* When the sender blocks, we take that as a sign that the sender is + * faster than the receiver. To reduce the transmit rate of the sender, + * we delay the sending of the read notification by decreasing the + * write_notify_window. The notification is delayed until the number of + * bytes used in the queue drops below the write_notify_window. + */ + + if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { + PKT_FIELD(vsk, peer_waiting_write_detected) = true; + if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + } else { + PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + + } + } + notify_limit = vmci_trans(vsk)->consume_size - + PKT_FIELD(vsk, write_notify_window); +#else + notify_limit = 0; +#endif + + /* For now we ignore the wait information and just see if the free + * space exceeds the notify limit. Note that improving this function + * to be more intelligent will not require a protocol change and will + * retain compatibility between endpoints with mixed versions of this + * function. + * + * The notify_limit is used to delay notifications in the case where + * flow control is enabled. Below the test is expressed in terms of + * free space in the queue: if free_space > ConsumeSize - + * write_notify_window then notify An alternate way of expressing this + * is to rewrite the expression to use the data ready in the receive + * queue: if write_notify_window > bufferReady then notify as + * free_space == ConsumeSize - bufferReady. + */ + retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > + notify_limit; +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + if (retval) { + /* + * Once we notify the peer, we reset the detected flag so the + * next wait will again cause a decrease in the window size. + */ + + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + } +#endif + return retval; +#else + return true; +#endif +} + +static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + if (!PKT_FIELD(vsk, peer_waiting_read)) + return false; + + /* For now we ignore the wait information and just see if there is any + * data for our peer to read. Note that improving this function to be + * more intelligent will not require a protocol change and will retain + * compatibility between endpoints with mixed versions of this + * function. + */ + return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0; +#else + return true; +#endif +} + +static void +vmci_transport_handle_waiting_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + PKT_FIELD(vsk, peer_waiting_read) = true; + memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait, + sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); + + if (vmci_transport_notify_waiting_read(vsk)) { + bool sent; + + if (bottom_half) + sent = vmci_transport_send_wrote_bh(dst, src) > 0; + else + sent = vmci_transport_send_wrote(sk) > 0; + + if (sent) + PKT_FIELD(vsk, peer_waiting_read) = false; + } +#endif +} + +static void +vmci_transport_handle_waiting_write(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + PKT_FIELD(vsk, peer_waiting_write) = true; + memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait, + sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); + + if (vmci_transport_notify_waiting_write(vsk)) { + bool sent; + + if (bottom_half) + sent = vmci_transport_send_read_bh(dst, src) > 0; + else + sent = vmci_transport_send_read(sk) > 0; + + if (sent) + PKT_FIELD(vsk, peer_waiting_write) = false; + } +#endif +} + +static void +vmci_transport_handle_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + PKT_FIELD(vsk, sent_waiting_write) = false; +#endif + + sk->sk_write_space(sk); +} + +static bool send_waiting_read(struct sock *sk, u64 room_needed) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + struct vmci_transport_waiting_info waiting_info; + u64 tail; + u64 head; + u64 room_left; + bool ret; + + vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, sent_waiting_read)) + return true; + + if (PKT_FIELD(vsk, write_notify_window) < + vmci_trans(vsk)->consume_size) + PKT_FIELD(vsk, write_notify_window) = + min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, + vmci_trans(vsk)->consume_size); + + vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head); + room_left = vmci_trans(vsk)->consume_size - head; + if (room_needed >= room_left) { + waiting_info.offset = room_needed - room_left; + waiting_info.generation = + PKT_FIELD(vsk, consume_q_generation) + 1; + } else { + waiting_info.offset = head + room_needed; + waiting_info.generation = PKT_FIELD(vsk, consume_q_generation); + } + + ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0; + if (ret) + PKT_FIELD(vsk, sent_waiting_read) = true; + + return ret; +#else + return true; +#endif +} + +static bool send_waiting_write(struct sock *sk, u64 room_needed) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + struct vmci_transport_waiting_info waiting_info; + u64 tail; + u64 head; + u64 room_left; + bool ret; + + vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, sent_waiting_write)) + return true; + + vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head); + room_left = vmci_trans(vsk)->produce_size - tail; + if (room_needed + 1 >= room_left) { + /* Wraps around to current generation. */ + waiting_info.offset = room_needed + 1 - room_left; + waiting_info.generation = PKT_FIELD(vsk, produce_q_generation); + } else { + waiting_info.offset = tail + room_needed + 1; + waiting_info.generation = + PKT_FIELD(vsk, produce_q_generation) - 1; + } + + ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0; + if (ret) + PKT_FIELD(vsk, sent_waiting_write) = true; + + return ret; +#else + return true; +#endif +} + +static int vmci_transport_send_read_notification(struct sock *sk) +{ + struct vsock_sock *vsk; + bool sent_read; + unsigned int retries; + int err; + + vsk = vsock_sk(sk); + sent_read = false; + retries = 0; + err = 0; + + if (vmci_transport_notify_waiting_write(vsk)) { + /* Notify the peer that we have read, retrying the send on + * failure up to our maximum value. XXX For now we just log + * the failure, but later we should schedule a work item to + * handle the resend until it succeeds. That would require + * keeping track of work items in the vsk and cleaning them up + * upon socket close. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_read && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_read(sk); + if (err >= 0) + sent_read = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) + pr_err("%p unable to send read notify to peer\n", sk); + else +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + PKT_FIELD(vsk, peer_waiting_write) = false; +#endif + + } + return err; +} + +static void +vmci_transport_handle_wrote(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk = vsock_sk(sk); + PKT_FIELD(vsk, sent_waiting_read) = false; +#endif + sk->sk_data_ready(sk, 0); +} + +static void vmci_transport_notify_pkt_socket_init(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_read) = false; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + PKT_FIELD(vsk, sent_waiting_read) = false; + PKT_FIELD(vsk, sent_waiting_write) = false; + PKT_FIELD(vsk, produce_q_generation) = 0; + PKT_FIELD(vsk, consume_q_generation) = 0; + + memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0, + sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); + memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0, + sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); +} + +static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) +{ +} + +static int +vmci_transport_notify_pkt_poll_in(struct sock *sk, + size_t target, bool *data_ready_now) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk)) { + *data_ready_now = true; + } else { + /* We can't read right now because there is nothing in the + * queue. Ask for notifications when there is something to + * read. + */ + if (sk->sk_state == SS_CONNECTED) { + if (!send_waiting_read(sk, 1)) + return -1; + + } + *data_ready_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_poll_out(struct sock *sk, + size_t target, bool *space_avail_now) +{ + s64 produce_q_free_space; + struct vsock_sock *vsk = vsock_sk(sk); + + produce_q_free_space = vsock_stream_has_space(vsk); + if (produce_q_free_space > 0) { + *space_avail_now = true; + return 0; + } else if (produce_q_free_space == 0) { + /* This is a connected socket but we can't currently send data. + * Notify the peer that we are waiting if the queue is full. We + * only send a waiting write if the queue is full because + * otherwise we end up in an infinite WAITING_WRITE, READ, + * WAITING_WRITE, READ, etc. loop. Treat failing to send the + * notification as a socket error, passing that back through + * the mask. + */ + if (!send_waiting_write(sk, 1)) + return -1; + + *space_avail_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_init( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + +#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY + data->consume_head = 0; + data->produce_tail = 0; +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + data->notify_on_block = false; + + if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { + PKT_FIELD(vsk, write_notify_min_window) = target + 1; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) { + /* If the current window is smaller than the new + * minimal window size, we need to reevaluate whether + * we need to notify the sender. If the number of ready + * bytes are smaller than the new window, we need to + * send a notification to the sender before we block. + */ + + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + data->notify_on_block = true; + } + } +#endif +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_pre_block( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + int err = 0; + + /* Notify our peer that we are waiting for data to read. */ + if (!send_waiting_read(sk, target)) { + err = -EHOSTUNREACH; + return err; + } +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + if (data->notify_on_block) { + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + data->notify_on_block = false; + } +#endif + + return err; +} + +static int +vmci_transport_notify_pkt_recv_pre_dequeue( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + /* Now consume up to len bytes from the queue. Note that since we have + * the socket locked we should copy at least ready bytes. + */ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, + &data->produce_tail, + &data->consume_head); +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_post_dequeue( + struct sock *sk, + size_t target, + ssize_t copied, + bool data_read, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk; + int err; + + vsk = vsock_sk(sk); + err = 0; + + if (data_read) { +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + /* Detect a wrap-around to maintain queue generation. Note + * that this is safe since we hold the socket lock across the + * two queue pair operations. + */ + if (copied >= + vmci_trans(vsk)->consume_size - data->consume_head) + PKT_FIELD(vsk, consume_q_generation)++; +#endif + + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + } + return err; +} + +static int +vmci_transport_notify_pkt_send_init( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ +#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY + data->consume_head = 0; + data->produce_tail = 0; +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_send_pre_block( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + /* Notify our peer that we are waiting for room to write. */ + if (!send_waiting_write(sk, 1)) + return -EHOSTUNREACH; + + return 0; +} + +static int +vmci_transport_notify_pkt_send_pre_enqueue( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, + &data->produce_tail, + &data->consume_head); +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_send_post_enqueue( + struct sock *sk, + ssize_t written, + struct vmci_transport_send_notify_data *data) +{ + int err = 0; + struct vsock_sock *vsk; + bool sent_wrote = false; + int retries = 0; + + vsk = vsock_sk(sk); + +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + /* Detect a wrap-around to maintain queue generation. Note that this + * is safe since we hold the socket lock across the two queue pair + * operations. + */ + if (written >= vmci_trans(vsk)->produce_size - data->produce_tail) + PKT_FIELD(vsk, produce_q_generation)++; + +#endif + + if (vmci_transport_notify_waiting_read(vsk)) { + /* Notify the peer that we have written, retrying the send on + * failure up to our maximum value. See the XXX comment for the + * corresponding piece of code in StreamRecvmsg() for potential + * improvements. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_wrote && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_wrote(sk); + if (err >= 0) + sent_wrote = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + pr_err("%p unable to send wrote notify to peer\n", sk); + return err; + } else { +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + PKT_FIELD(vsk, peer_waiting_read) = false; +#endif + } + } + return err; +} + +static void +vmci_transport_notify_pkt_handle_pkt( + struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src, bool *pkt_processed) +{ + bool processed = false; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_READ: + vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: + vmci_transport_handle_waiting_write(sk, pkt, bottom_half, + dst, src); + processed = true; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: + vmci_transport_handle_waiting_read(sk, pkt, bottom_half, + dst, src); + processed = true; + break; + } + + if (pkt_processed) + *pkt_processed = processed; +} + +static void vmci_transport_notify_pkt_process_request(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +/* Socket control packet based operations. */ +struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, + vmci_transport_notify_pkt_poll_out, + vmci_transport_notify_pkt_handle_pkt, + vmci_transport_notify_pkt_recv_init, + vmci_transport_notify_pkt_recv_pre_block, + vmci_transport_notify_pkt_recv_pre_dequeue, + vmci_transport_notify_pkt_recv_post_dequeue, + vmci_transport_notify_pkt_send_init, + vmci_transport_notify_pkt_send_pre_block, + vmci_transport_notify_pkt_send_pre_enqueue, + vmci_transport_notify_pkt_send_post_enqueue, + vmci_transport_notify_pkt_process_request, + vmci_transport_notify_pkt_process_negotiate, +}; diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h new file mode 100644 index 0000000..7df7932 --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify.h @@ -0,0 +1,83 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __VMCI_TRANSPORT_NOTIFY_H__ +#define __VMCI_TRANSPORT_NOTIFY_H__ + +#include <linux/types.h> +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> +#include <linux/vm_sockets.h> + +#include "vmci_transport.h" + +/* Comment this out to compare with old protocol. */ +#define VSOCK_OPTIMIZATION_WAITING_NOTIFY 1 +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) +/* Comment this out to remove flow control for "new" protocol */ +#define VSOCK_OPTIMIZATION_FLOW_CONTROL 1 +#endif + +#define VMCI_TRANSPORT_MAX_DGRAM_RESENDS 10 + +struct vmci_transport_recv_notify_data { + u64 consume_head; + u64 produce_tail; + bool notify_on_block; +}; + +struct vmci_transport_send_notify_data { + u64 consume_head; + u64 produce_tail; +}; + +/* Socket notification callbacks. */ +struct vmci_transport_notify_ops { + void (*socket_init) (struct sock *sk); + void (*socket_destruct) (struct vsock_sock *vsk); + int (*poll_in) (struct sock *sk, size_t target, + bool *data_ready_now); + int (*poll_out) (struct sock *sk, size_t target, + bool *space_avail_now); + void (*handle_notify_pkt) (struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, struct sockaddr_vm *dst, + struct sockaddr_vm *src, + bool *pkt_processed); + int (*recv_init) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_pre_block) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_pre_dequeue) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_post_dequeue) (struct sock *sk, size_t target, + ssize_t copied, bool data_read, + struct vmci_transport_recv_notify_data *data); + int (*send_init) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_pre_block) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_pre_enqueue) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_post_enqueue) (struct sock *sk, ssize_t written, + struct vmci_transport_send_notify_data *data); + void (*process_request) (struct sock *sk); + void (*process_negotiate) (struct sock *sk); +}; + +extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; +extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; + +#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c new file mode 100644 index 0000000..622bd7a --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -0,0 +1,438 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <net/sock.h> + +#include "vmci_transport_notify.h" + +#define PKT_FIELD(vsk, field_name) \ + (vmci_trans(vsk)->notify.pkt_q_state.field_name) + +static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) +{ + bool retval; + u64 notify_limit; + + if (!PKT_FIELD(vsk, peer_waiting_write)) + return false; + + /* When the sender blocks, we take that as a sign that the sender is + * faster than the receiver. To reduce the transmit rate of the sender, + * we delay the sending of the read notification by decreasing the + * write_notify_window. The notification is delayed until the number of + * bytes used in the queue drops below the write_notify_window. + */ + + if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { + PKT_FIELD(vsk, peer_waiting_write_detected) = true; + if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + } else { + PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + + } + } + notify_limit = vmci_trans(vsk)->consume_size - + PKT_FIELD(vsk, write_notify_window); + + /* The notify_limit is used to delay notifications in the case where + * flow control is enabled. Below the test is expressed in terms of + * free space in the queue: if free_space > ConsumeSize - + * write_notify_window then notify An alternate way of expressing this + * is to rewrite the expression to use the data ready in the receive + * queue: if write_notify_window > bufferReady then notify as + * free_space == ConsumeSize - bufferReady. + */ + + retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > + notify_limit; + + if (retval) { + /* Once we notify the peer, we reset the detected flag so the + * next wait will again cause a decrease in the window size. + */ + + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + } + return retval; +} + +static void +vmci_transport_handle_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ + sk->sk_write_space(sk); +} + +static void +vmci_transport_handle_wrote(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ + sk->sk_data_ready(sk, 0); +} + +static void vsock_block_update_write_window(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size) + PKT_FIELD(vsk, write_notify_window) = + min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, + vmci_trans(vsk)->consume_size); +} + +static int vmci_transport_send_read_notification(struct sock *sk) +{ + struct vsock_sock *vsk; + bool sent_read; + unsigned int retries; + int err; + + vsk = vsock_sk(sk); + sent_read = false; + retries = 0; + err = 0; + + if (vmci_transport_notify_waiting_write(vsk)) { + /* Notify the peer that we have read, retrying the send on + * failure up to our maximum value. XXX For now we just log + * the failure, but later we should schedule a work item to + * handle the resend until it succeeds. That would require + * keeping track of work items in the vsk and cleaning them up + * upon socket close. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_read && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_read(sk); + if (err >= 0) + sent_read = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read) + pr_err("%p unable to send read notification to peer\n", + sk); + else + PKT_FIELD(vsk, peer_waiting_write) = false; + + } + return err; +} + +static void vmci_transport_notify_pkt_socket_init(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; +} + +static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) +{ + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; +} + +static int +vmci_transport_notify_pkt_poll_in(struct sock *sk, + size_t target, bool *data_ready_now) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk)) { + *data_ready_now = true; + } else { + /* We can't read right now because there is nothing in the + * queue. Ask for notifications when there is something to + * read. + */ + if (sk->sk_state == SS_CONNECTED) + vsock_block_update_write_window(sk); + *data_ready_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_poll_out(struct sock *sk, + size_t target, bool *space_avail_now) +{ + s64 produce_q_free_space; + struct vsock_sock *vsk = vsock_sk(sk); + + produce_q_free_space = vsock_stream_has_space(vsk); + if (produce_q_free_space > 0) { + *space_avail_now = true; + return 0; + } else if (produce_q_free_space == 0) { + /* This is a connected socket but we can't currently send data. + * Nothing else to do. + */ + *space_avail_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_init( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + data->consume_head = 0; + data->produce_tail = 0; + data->notify_on_block = false; + + if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { + PKT_FIELD(vsk, write_notify_min_window) = target + 1; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) { + /* If the current window is smaller than the new + * minimal window size, we need to reevaluate whether + * we need to notify the sender. If the number of ready + * bytes are smaller than the new window, we need to + * send a notification to the sender before we block. + */ + + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + data->notify_on_block = true; + } + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_pre_block( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + int err = 0; + + vsock_block_update_write_window(sk); + + if (data->notify_on_block) { + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + data->notify_on_block = false; + } + + return err; +} + +static int +vmci_transport_notify_pkt_recv_post_dequeue( + struct sock *sk, + size_t target, + ssize_t copied, + bool data_read, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk; + int err; + bool was_full = false; + u64 free_space; + + vsk = vsock_sk(sk); + err = 0; + + if (data_read) { + smp_mb(); + + free_space = + vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair); + was_full = free_space == copied; + + if (was_full) + PKT_FIELD(vsk, peer_waiting_write) = true; + + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + /* See the comment in + * vmci_transport_notify_pkt_send_post_enqueue(). + */ + sk->sk_data_ready(sk, 0); + } + + return err; +} + +static int +vmci_transport_notify_pkt_send_init( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + data->consume_head = 0; + data->produce_tail = 0; + + return 0; +} + +static int +vmci_transport_notify_pkt_send_post_enqueue( + struct sock *sk, + ssize_t written, + struct vmci_transport_send_notify_data *data) +{ + int err = 0; + struct vsock_sock *vsk; + bool sent_wrote = false; + bool was_empty; + int retries = 0; + + vsk = vsock_sk(sk); + + smp_mb(); + + was_empty = + vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written; + if (was_empty) { + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_wrote && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_wrote(sk); + if (err >= 0) + sent_wrote = true; + + retries++; + } + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) { + pr_err("%p unable to send wrote notification to peer\n", + sk); + return err; + } + + return err; +} + +static void +vmci_transport_notify_pkt_handle_pkt( + struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src, bool *pkt_processed) +{ + bool processed = false; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_READ: + vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); + processed = true; + break; + } + + if (pkt_processed) + *pkt_processed = processed; +} + +static void vmci_transport_notify_pkt_process_request(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static int +vmci_transport_notify_pkt_recv_pre_dequeue( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +static int +vmci_transport_notify_pkt_send_pre_block( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +static int +vmci_transport_notify_pkt_send_pre_enqueue( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +/* Socket always on control packet based operations. */ +struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, + vmci_transport_notify_pkt_poll_out, + vmci_transport_notify_pkt_handle_pkt, + vmci_transport_notify_pkt_recv_init, + vmci_transport_notify_pkt_recv_pre_block, + vmci_transport_notify_pkt_recv_pre_dequeue, + vmci_transport_notify_pkt_recv_post_dequeue, + vmci_transport_notify_pkt_send_init, + vmci_transport_notify_pkt_send_pre_block, + vmci_transport_notify_pkt_send_pre_enqueue, + vmci_transport_notify_pkt_send_post_enqueue, + vmci_transport_notify_pkt_process_request, + vmci_transport_notify_pkt_process_negotiate, +}; diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c new file mode 100644 index 0000000..ec2611b --- /dev/null +++ b/net/vmw_vsock/vsock_addr.c @@ -0,0 +1,76 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <net/sock.h> + +#include "vsock_addr.h" + +void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port) +{ + memset(addr, 0, sizeof(*addr)); + addr->svm_family = AF_VSOCK; + addr->svm_cid = cid; + addr->svm_port = port; +} +EXPORT_SYMBOL_GPL(vsock_addr_init); + +int vsock_addr_validate(const struct sockaddr_vm *addr) +{ + if (!addr) + return -EFAULT; + + if (addr->svm_family != AF_VSOCK) + return -EAFNOSUPPORT; + + if (addr->svm_zero[0] != 0) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(vsock_addr_validate); + +bool vsock_addr_bound(const struct sockaddr_vm *addr) +{ + return addr->svm_port != VMADDR_PORT_ANY; +} +EXPORT_SYMBOL_GPL(vsock_addr_bound); + +void vsock_addr_unbind(struct sockaddr_vm *addr) +{ + vsock_addr_init(addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); +} +EXPORT_SYMBOL_GPL(vsock_addr_unbind); + +bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other) +{ + return addr->svm_cid == other->svm_cid && + addr->svm_port == other->svm_port; +} +EXPORT_SYMBOL_GPL(vsock_addr_equals_addr); + +int vsock_addr_cast(const struct sockaddr *addr, + size_t len, struct sockaddr_vm **out_addr) +{ + if (len < sizeof(**out_addr)) + return -EFAULT; + + *out_addr = (struct sockaddr_vm *)addr; + return vsock_addr_validate(*out_addr); +} +EXPORT_SYMBOL_GPL(vsock_addr_cast); diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h new file mode 100644 index 0000000..9ccd531 --- /dev/null +++ b/net/vmw_vsock/vsock_addr.h @@ -0,0 +1,30 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VSOCK_ADDR_H_ +#define _VSOCK_ADDR_H_ + +#include <linux/vm_sockets.h> + +void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port); +int vsock_addr_validate(const struct sockaddr_vm *addr); +bool vsock_addr_bound(const struct sockaddr_vm *addr); +void vsock_addr_unbind(struct sockaddr_vm *addr); +bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other); +int vsock_addr_cast(const struct sockaddr *addr, size_t len, + struct sockaddr_vm **out_addr); + +#endif diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig deleted file mode 100644 index a157a2e..0000000 --- a/net/wanrouter/Kconfig +++ /dev/null @@ -1,27 +0,0 @@ -# -# Configuration for WAN router -# - -config WAN_ROUTER - tristate "WAN router (DEPRECATED)" - depends on EXPERIMENTAL - ---help--- - Wide Area Networks (WANs), such as X.25, frame relay and leased - lines, are used to interconnect Local Area Networks (LANs) over vast - distances with data transfer rates significantly higher than those - achievable with commonly used asynchronous modem connections. - Usually, a quite expensive external device called a `WAN router' is - needed to connect to a WAN. - - As an alternative, WAN routing can be built into the Linux kernel. - With relatively inexpensive WAN interface cards available on the - market, a perfectly usable router can be built for less than half - the price of an external router. If you have one of those cards and - wish to use your Linux box as a WAN router, say Y here and also to - the WAN driver for your card, below. You will then need the - wan-tools package which is available from <ftp://ftp.sangoma.com/>. - - To compile WAN routing support as a module, choose M here: the - module will be called wanrouter. - - If unsure, say N. diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile deleted file mode 100644 index 4da14bc..0000000 --- a/net/wanrouter/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the Linux WAN router layer. -# - -obj-$(CONFIG_WAN_ROUTER) += wanrouter.o - -wanrouter-y := wanproc.o wanmain.o diff --git a/net/wanrouter/patchlevel b/net/wanrouter/patchlevel deleted file mode 100644 index c043eea..0000000 --- a/net/wanrouter/patchlevel +++ /dev/null @@ -1 +0,0 @@ -2.2.1 diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c deleted file mode 100644 index 2ab7850..0000000 --- a/net/wanrouter/wanmain.c +++ /dev/null @@ -1,782 +0,0 @@ -/***************************************************************************** -* wanmain.c WAN Multiprotocol Router Module. Main code. -* -* This module is completely hardware-independent and provides -* the following common services for the WAN Link Drivers: -* o WAN device management (registering, unregistering) -* o Network interface management -* o Physical connection management (dial-up, incoming calls) -* o Logical connection management (switched virtual circuits) -* o Protocol encapsulation/decapsulation -* -* Author: Gideon Hack -* -* Copyright: (c) 1995-1999 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Nov 24, 2000 Nenad Corbic Updated for 2.4.X kernels -* Nov 07, 2000 Nenad Corbic Fixed the Mulit-Port PPP for kernels 2.2.16 and -* greater. -* Aug 2, 2000 Nenad Corbic Block the Multi-Port PPP from running on -* kernels 2.2.16 or greater. The SyncPPP -* has changed. -* Jul 13, 2000 Nenad Corbic Added SyncPPP support -* Added extra debugging in device_setup(). -* Oct 01, 1999 Gideon Hack Update for s514 PCI card -* Dec 27, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 31, 1997 Alan Cox Hacked it about a bit for 2.1 -* Jun 27, 1997 Alan Cox realigned with vendor code -* Oct 15, 1997 Farhan Thawar changed wan_encapsulate to add a pad byte of 0 -* Apr 20, 1998 Alan Cox Fixed 2.1 symbols -* May 17, 1998 K. Baranowski Fixed SNAP encapsulation in wan_encapsulate -* Dec 15, 1998 Arnaldo Melo support for firmwares of up to 128000 bytes -* check wandev->setup return value -* Dec 22, 1998 Arnaldo Melo vmalloc/vfree used in device_setup to allocate -* kernel memory and copy configuration data to -* kernel space (for big firmwares) -* Jun 02, 1999 Gideon Hack Updates for Linux 2.0.X and 2.2.X kernels. -*****************************************************************************/ - -#include <linux/stddef.h> /* offsetof(), etc. */ -#include <linux/capability.h> -#include <linux/errno.h> /* return codes */ -#include <linux/kernel.h> -#include <linux/module.h> /* support for loadable modules */ -#include <linux/slab.h> /* kmalloc(), kfree() */ -#include <linux/mutex.h> -#include <linux/mm.h> -#include <linux/string.h> /* inline mem*, str* functions */ - -#include <asm/byteorder.h> /* htons(), etc. */ -#include <linux/wanrouter.h> /* WAN router API definitions */ - -#include <linux/vmalloc.h> /* vmalloc, vfree */ -#include <asm/uaccess.h> /* copy_to/from_user */ -#include <linux/init.h> /* __initfunc et al. */ - -#define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev))) - -/* - * Function Prototypes - */ - -/* - * WAN device IOCTL handlers - */ - -static DEFINE_MUTEX(wanrouter_mutex); -static int wanrouter_device_setup(struct wan_device *wandev, - wandev_conf_t __user *u_conf); -static int wanrouter_device_stat(struct wan_device *wandev, - wandev_stat_t __user *u_stat); -static int wanrouter_device_shutdown(struct wan_device *wandev); -static int wanrouter_device_new_if(struct wan_device *wandev, - wanif_conf_t __user *u_conf); -static int wanrouter_device_del_if(struct wan_device *wandev, - char __user *u_name); - -/* - * Miscellaneous - */ - -static struct wan_device *wanrouter_find_device(char *name); -static int wanrouter_delete_interface(struct wan_device *wandev, char *name); -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __acquires(lock); -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __releases(lock); - - - -/* - * Global Data - */ - -static char wanrouter_fullname[] = "Sangoma WANPIPE Router"; -static char wanrouter_copyright[] = "(c) 1995-2000 Sangoma Technologies Inc."; -static char wanrouter_modname[] = ROUTER_NAME; /* short module name */ -struct wan_device* wanrouter_router_devlist; /* list of registered devices */ - -/* - * Organize Unique Identifiers for encapsulation/decapsulation - */ - -#if 0 -static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 }; -static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 }; -#endif - -static int __init wanrouter_init(void) -{ - int err; - - printk(KERN_INFO "%s v%u.%u %s\n", - wanrouter_fullname, ROUTER_VERSION, ROUTER_RELEASE, - wanrouter_copyright); - - err = wanrouter_proc_init(); - if (err) - printk(KERN_INFO "%s: can't create entry in proc filesystem!\n", - wanrouter_modname); - - return err; -} - -static void __exit wanrouter_cleanup (void) -{ - wanrouter_proc_cleanup(); -} - -/* - * This is just plain dumb. We should move the bugger to drivers/net/wan, - * slap it first in directory and make it module_init(). The only reason - * for subsys_initcall() here is that net goes after drivers (why, BTW?) - */ -subsys_initcall(wanrouter_init); -module_exit(wanrouter_cleanup); - -/* - * Kernel APIs - */ - -/* - * Register WAN device. - * o verify device credentials - * o create an entry for the device in the /proc/net/router directory - * o initialize internally maintained fields of the wan_device structure - * o link device data space to a singly-linked list - * o if it's the first device, then start kernel 'thread' - * o increment module use count - * - * Return: - * 0 Ok - * < 0 error. - * - * Context: process - */ - - -int register_wan_device(struct wan_device *wandev) -{ - int err, namelen; - - if ((wandev == NULL) || (wandev->magic != ROUTER_MAGIC) || - (wandev->name == NULL)) - return -EINVAL; - - namelen = strlen(wandev->name); - if (!namelen || (namelen > WAN_DRVNAME_SZ)) - return -EINVAL; - - if (wanrouter_find_device(wandev->name)) - return -EEXIST; - -#ifdef WANDEBUG - printk(KERN_INFO "%s: registering WAN device %s\n", - wanrouter_modname, wandev->name); -#endif - - /* - * Register /proc directory entry - */ - err = wanrouter_proc_add(wandev); - if (err) { - printk(KERN_INFO - "%s: can't create /proc/net/router/%s entry!\n", - wanrouter_modname, wandev->name); - return err; - } - - /* - * Initialize fields of the wan_device structure maintained by the - * router and update local data. - */ - - wandev->ndev = 0; - wandev->dev = NULL; - wandev->next = wanrouter_router_devlist; - wanrouter_router_devlist = wandev; - return 0; -} - -/* - * Unregister WAN device. - * o shut down device - * o unlink device data space from the linked list - * o delete device entry in the /proc/net/router directory - * o decrement module use count - * - * Return: 0 Ok - * <0 error. - * Context: process - */ - - -int unregister_wan_device(char *name) -{ - struct wan_device *wandev, *prev; - - if (name == NULL) - return -EINVAL; - - for (wandev = wanrouter_router_devlist, prev = NULL; - wandev && strcmp(wandev->name, name); - prev = wandev, wandev = wandev->next) - ; - if (wandev == NULL) - return -ENODEV; - -#ifdef WANDEBUG - printk(KERN_INFO "%s: unregistering WAN device %s\n", - wanrouter_modname, name); -#endif - - if (wandev->state != WAN_UNCONFIGURED) - wanrouter_device_shutdown(wandev); - - if (prev) - prev->next = wandev->next; - else - wanrouter_router_devlist = wandev->next; - - wanrouter_proc_delete(wandev); - return 0; -} - -#if 0 - -/* - * Encapsulate packet. - * - * Return: encapsulation header size - * < 0 - unsupported Ethertype - * - * Notes: - * 1. This function may be called on interrupt context. - */ - - -int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev, - unsigned short type) -{ - int hdr_len = 0; - - switch (type) { - case ETH_P_IP: /* IP datagram encapsulation */ - hdr_len += 1; - skb_push(skb, 1); - skb->data[0] = NLPID_IP; - break; - - case ETH_P_IPX: /* SNAP encapsulation */ - case ETH_P_ARP: - hdr_len += 7; - skb_push(skb, 7); - skb->data[0] = 0; - skb->data[1] = NLPID_SNAP; - skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether, - sizeof(wanrouter_oui_ether)); - *((unsigned short*)&skb->data[5]) = htons(type); - break; - - default: /* Unknown packet type */ - printk(KERN_INFO - "%s: unsupported Ethertype 0x%04X on interface %s!\n", - wanrouter_modname, type, dev->name); - hdr_len = -EINVAL; - } - return hdr_len; -} - - -/* - * Decapsulate packet. - * - * Return: Ethertype (in network order) - * 0 unknown encapsulation - * - * Notes: - * 1. This function may be called on interrupt context. - */ - - -__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) -{ - int cnt = skb->data[0] ? 0 : 1; /* there may be a pad present */ - __be16 ethertype; - - switch (skb->data[cnt]) { - case NLPID_IP: /* IP datagramm */ - ethertype = htons(ETH_P_IP); - cnt += 1; - break; - - case NLPID_SNAP: /* SNAP encapsulation */ - if (memcmp(&skb->data[cnt + 1], wanrouter_oui_ether, - sizeof(wanrouter_oui_ether))){ - printk(KERN_INFO - "%s: unsupported SNAP OUI %02X-%02X-%02X " - "on interface %s!\n", wanrouter_modname, - skb->data[cnt+1], skb->data[cnt+2], - skb->data[cnt+3], dev->name); - return 0; - } - ethertype = *((__be16*)&skb->data[cnt+4]); - cnt += 6; - break; - - /* add other protocols, e.g. CLNP, ESIS, ISIS, if needed */ - - default: - printk(KERN_INFO - "%s: unsupported NLPID 0x%02X on interface %s!\n", - wanrouter_modname, skb->data[cnt], dev->name); - return 0; - } - skb->protocol = ethertype; - skb->pkt_type = PACKET_HOST; /* Physically point to point */ - skb_pull(skb, cnt); - skb_reset_mac_header(skb); - return ethertype; -} - -#endif /* 0 */ - -/* - * WAN device IOCTL. - * o find WAN device associated with this node - * o execute requested action or pass command to the device driver - */ - -long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = file->f_path.dentry->d_inode; - int err = 0; - struct proc_dir_entry *dent; - struct wan_device *wandev; - void __user *data = (void __user *)arg; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if ((cmd >> 8) != ROUTER_IOCTL) - return -EINVAL; - - dent = PDE(inode); - if ((dent == NULL) || (dent->data == NULL)) - return -EINVAL; - - wandev = dent->data; - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - - mutex_lock(&wanrouter_mutex); - switch (cmd) { - case ROUTER_SETUP: - err = wanrouter_device_setup(wandev, data); - break; - - case ROUTER_DOWN: - err = wanrouter_device_shutdown(wandev); - break; - - case ROUTER_STAT: - err = wanrouter_device_stat(wandev, data); - break; - - case ROUTER_IFNEW: - err = wanrouter_device_new_if(wandev, data); - break; - - case ROUTER_IFDEL: - err = wanrouter_device_del_if(wandev, data); - break; - - case ROUTER_IFSTAT: - break; - - default: - if ((cmd >= ROUTER_USER) && - (cmd <= ROUTER_USER_MAX) && - wandev->ioctl) - err = wandev->ioctl(wandev, cmd, arg); - else err = -EINVAL; - } - mutex_unlock(&wanrouter_mutex); - return err; -} - -/* - * WAN Driver IOCTL Handlers - */ - -/* - * Setup WAN link device. - * o verify user address space - * o allocate kernel memory and copy configuration data to kernel space - * o if configuration data includes extension, copy it to kernel space too - * o call driver's setup() entry point - */ - -static int wanrouter_device_setup(struct wan_device *wandev, - wandev_conf_t __user *u_conf) -{ - void *data = NULL; - wandev_conf_t *conf; - int err = -EINVAL; - - if (wandev->setup == NULL) { /* Nothing to do ? */ - printk(KERN_INFO "%s: ERROR, No setup script: wandev->setup()\n", - wandev->name); - return 0; - } - - conf = kmalloc(sizeof(wandev_conf_t), GFP_KERNEL); - if (conf == NULL){ - printk(KERN_INFO "%s: ERROR, Failed to allocate kernel memory !\n", - wandev->name); - return -ENOBUFS; - } - - if (copy_from_user(conf, u_conf, sizeof(wandev_conf_t))) { - printk(KERN_INFO "%s: Failed to copy user config data to kernel space!\n", - wandev->name); - kfree(conf); - return -EFAULT; - } - - if (conf->magic != ROUTER_MAGIC) { - kfree(conf); - printk(KERN_INFO "%s: ERROR, Invalid MAGIC Number\n", - wandev->name); - return -EINVAL; - } - - if (conf->data_size && conf->data) { - if (conf->data_size > 128000) { - printk(KERN_INFO - "%s: ERROR, Invalid firmware data size %i !\n", - wandev->name, conf->data_size); - kfree(conf); - return -EINVAL; - } - - data = vmalloc(conf->data_size); - if (!data) { - printk(KERN_INFO - "%s: ERROR, Failed allocate kernel memory !\n", - wandev->name); - kfree(conf); - return -ENOBUFS; - } - if (!copy_from_user(data, conf->data, conf->data_size)) { - conf->data = data; - err = wandev->setup(wandev, conf); - } else { - printk(KERN_INFO - "%s: ERROR, Failed to copy from user data !\n", - wandev->name); - err = -EFAULT; - } - vfree(data); - } else { - printk(KERN_INFO - "%s: ERROR, No firmware found ! Firmware size = %i !\n", - wandev->name, conf->data_size); - } - - kfree(conf); - return err; -} - -/* - * Shutdown WAN device. - * o delete all not opened logical channels for this device - * o call driver's shutdown() entry point - */ - -static int wanrouter_device_shutdown(struct wan_device *wandev) -{ - struct net_device *dev; - int err=0; - - if (wandev->state == WAN_UNCONFIGURED) - return 0; - - printk(KERN_INFO "\n%s: Shutting Down!\n",wandev->name); - - for (dev = wandev->dev; dev;) { - err = wanrouter_delete_interface(wandev, dev->name); - if (err) - return err; - /* The above function deallocates the current dev - * structure. Therefore, we cannot use netdev_priv(dev) - * as the next element: wandev->dev points to the - * next element */ - dev = wandev->dev; - } - - if (wandev->ndev) - return -EBUSY; /* there are opened interfaces */ - - if (wandev->shutdown) - err=wandev->shutdown(wandev); - - return err; -} - -/* - * Get WAN device status & statistics. - */ - -static int wanrouter_device_stat(struct wan_device *wandev, - wandev_stat_t __user *u_stat) -{ - wandev_stat_t stat; - - memset(&stat, 0, sizeof(stat)); - - /* Ask device driver to update device statistics */ - if ((wandev->state != WAN_UNCONFIGURED) && wandev->update) - wandev->update(wandev); - - /* Fill out structure */ - stat.ndev = wandev->ndev; - stat.state = wandev->state; - - if (copy_to_user(u_stat, &stat, sizeof(stat))) - return -EFAULT; - - return 0; -} - -/* - * Create new WAN interface. - * o verify user address space - * o copy configuration data to kernel address space - * o allocate network interface data space - * o call driver's new_if() entry point - * o make sure there is no interface name conflict - * o register network interface - */ - -static int wanrouter_device_new_if(struct wan_device *wandev, - wanif_conf_t __user *u_conf) -{ - wanif_conf_t *cnf; - struct net_device *dev = NULL; - int err; - - if ((wandev->state == WAN_UNCONFIGURED) || (wandev->new_if == NULL)) - return -ENODEV; - - cnf = kmalloc(sizeof(wanif_conf_t), GFP_KERNEL); - if (!cnf) - return -ENOBUFS; - - err = -EFAULT; - if (copy_from_user(cnf, u_conf, sizeof(wanif_conf_t))) - goto out; - - err = -EINVAL; - if (cnf->magic != ROUTER_MAGIC) - goto out; - - if (cnf->config_id == WANCONFIG_MPPP) { - printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n", - wandev->name); - err = -EPROTONOSUPPORT; - goto out; - } else { - err = wandev->new_if(wandev, dev, cnf); - } - - if (!err) { - /* Register network interface. This will invoke init() - * function supplied by the driver. If device registered - * successfully, add it to the interface list. - */ - -#ifdef WANDEBUG - printk(KERN_INFO "%s: registering interface %s...\n", - wanrouter_modname, dev->name); -#endif - - err = register_netdev(dev); - if (!err) { - struct net_device *slave = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - - if (wandev->dev == NULL) { - wandev->dev = dev; - } else { - for (slave=wandev->dev; - DEV_TO_SLAVE(slave); - slave = DEV_TO_SLAVE(slave)) - DEV_TO_SLAVE(slave) = dev; - } - ++wandev->ndev; - - unlock_adapter_irq(&wandev->lock, &smp_flags); - err = 0; /* done !!! */ - goto out; - } - if (wandev->del_if) - wandev->del_if(wandev, dev); - free_netdev(dev); - } - -out: - kfree(cnf); - return err; -} - - -/* - * Delete WAN logical channel. - * o verify user address space - * o copy configuration data to kernel address space - */ - -static int wanrouter_device_del_if(struct wan_device *wandev, char __user *u_name) -{ - char name[WAN_IFNAME_SZ + 1]; - int err = 0; - - if (wandev->state == WAN_UNCONFIGURED) - return -ENODEV; - - memset(name, 0, sizeof(name)); - - if (copy_from_user(name, u_name, WAN_IFNAME_SZ)) - return -EFAULT; - - err = wanrouter_delete_interface(wandev, name); - if (err) - return err; - - /* If last interface being deleted, shutdown card - * This helps with administration at leaf nodes - * (You can tell if the person at the other end of the phone - * has an interface configured) and avoids DoS vulnerabilities - * in binary driver files - this fixes a problem with the current - * Sangoma driver going into strange states when all the network - * interfaces are deleted and the link irrecoverably disconnected. - */ - - if (!wandev->ndev && wandev->shutdown) - err = wandev->shutdown(wandev); - - return err; -} - -/* - * Miscellaneous Functions - */ - -/* - * Find WAN device by name. - * Return pointer to the WAN device data space or NULL if device not found. - */ - -static struct wan_device *wanrouter_find_device(char *name) -{ - struct wan_device *wandev; - - for (wandev = wanrouter_router_devlist; - wandev && strcmp(wandev->name, name); - wandev = wandev->next); - return wandev; -} - -/* - * Delete WAN logical channel identified by its name. - * o find logical channel by its name - * o call driver's del_if() entry point - * o unregister network interface - * o unlink channel data space from linked list of channels - * o release channel data space - * - * Return: 0 success - * -ENODEV channel not found. - * -EBUSY interface is open - * - * Note: If (force != 0), then device will be destroyed even if interface - * associated with it is open. It's caller's responsibility to make - * sure that opened interfaces are not removed! - */ - -static int wanrouter_delete_interface(struct wan_device *wandev, char *name) -{ - struct net_device *dev = NULL, *prev = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - dev = wandev->dev; - prev = NULL; - while (dev && strcmp(name, dev->name)) { - struct net_device **slave = netdev_priv(dev); - prev = dev; - dev = *slave; - } - unlock_adapter_irq(&wandev->lock, &smp_flags); - - if (dev == NULL) - return -ENODEV; /* interface not found */ - - if (netif_running(dev)) - return -EBUSY; /* interface in use */ - - if (wandev->del_if) - wandev->del_if(wandev, dev); - - lock_adapter_irq(&wandev->lock, &smp_flags); - if (prev) { - struct net_device **prev_slave = netdev_priv(prev); - struct net_device **slave = netdev_priv(dev); - - *prev_slave = *slave; - } else { - struct net_device **slave = netdev_priv(dev); - wandev->dev = *slave; - } - --wandev->ndev; - unlock_adapter_irq(&wandev->lock, &smp_flags); - - printk(KERN_INFO "%s: unregistering '%s'\n", wandev->name, dev->name); - - unregister_netdev(dev); - - free_netdev(dev); - - return 0; -} - -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __acquires(lock) -{ - spin_lock_irqsave(lock, *smp_flags); -} - - -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __releases(lock) -{ - spin_unlock_irqrestore(lock, *smp_flags); -} - -EXPORT_SYMBOL(register_wan_device); -EXPORT_SYMBOL(unregister_wan_device); - -MODULE_LICENSE("GPL"); - -/* - * End - */ diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c deleted file mode 100644 index c43612e..0000000 --- a/net/wanrouter/wanproc.c +++ /dev/null @@ -1,380 +0,0 @@ -/***************************************************************************** -* wanproc.c WAN Router Module. /proc filesystem interface. -* -* This module is completely hardware-independent and provides -* access to the router using Linux /proc filesystem. -* -* Author: Gideon Hack -* -* Copyright: (c) 1995-1999 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jun 02, 1999 Gideon Hack Updates for Linux 2.2.X kernels. -* Jun 29, 1997 Alan Cox Merged with 1.0.3 vendor code -* Jan 29, 1997 Gene Kozin v1.0.1. Implemented /proc read routines -* Jan 30, 1997 Alan Cox Hacked around for 2.1 -* Dec 13, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) -*****************************************************************************/ - -#include <linux/init.h> /* __initfunc et al. */ -#include <linux/stddef.h> /* offsetof(), etc. */ -#include <linux/errno.h> /* return codes */ -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/wanrouter.h> /* WAN router API definitions */ -#include <linux/seq_file.h> -#include <linux/mutex.h> - -#include <net/net_namespace.h> -#include <asm/io.h> - -#define PROC_STATS_FORMAT "%30s: %12lu\n" - -/****** Defines and Macros **************************************************/ - -#define PROT_DECODE(prot) ((prot == WANCONFIG_FR) ? " FR" :\ - (prot == WANCONFIG_X25) ? " X25" : \ - (prot == WANCONFIG_PPP) ? " PPP" : \ - (prot == WANCONFIG_CHDLC) ? " CHDLC": \ - (prot == WANCONFIG_MPPP) ? " MPPP" : \ - " Unknown" ) - -/****** Function Prototypes *************************************************/ - -#ifdef CONFIG_PROC_FS - -/* Miscellaneous */ - -/* - * Structures for interfacing with the /proc filesystem. - * Router creates its own directory /proc/net/router with the following - * entries: - * config device configuration - * status global device statistics - * <device> entry for each WAN device - */ - -/* - * Generic /proc/net/router/<file> file and inode operations - */ - -/* - * /proc/net/router - */ - -static DEFINE_MUTEX(config_mutex); -static struct proc_dir_entry *proc_router; - -/* Strings */ - -/* - * Interface functions - */ - -/****** Proc filesystem entry points ****************************************/ - -/* - * Iterator - */ -static void *r_start(struct seq_file *m, loff_t *pos) -{ - struct wan_device *wandev; - loff_t l = *pos; - - mutex_lock(&config_mutex); - if (!l--) - return SEQ_START_TOKEN; - for (wandev = wanrouter_router_devlist; l-- && wandev; - wandev = wandev->next) - ; - return wandev; -} - -static void *r_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct wan_device *wandev = v; - (*pos)++; - return (v == SEQ_START_TOKEN) ? wanrouter_router_devlist : wandev->next; -} - -static void r_stop(struct seq_file *m, void *v) -{ - mutex_unlock(&config_mutex); -} - -static int config_show(struct seq_file *m, void *v) -{ - struct wan_device *p = v; - if (v == SEQ_START_TOKEN) { - seq_puts(m, "Device name | port |IRQ|DMA| mem.addr |" - "mem.size|option1|option2|option3|option4\n"); - return 0; - } - if (!p->state) - return 0; - seq_printf(m, "%-15s|0x%-4X|%3u|%3u| 0x%-8lX |0x%-6X|%7u|%7u|%7u|%7u\n", - p->name, p->ioport, p->irq, p->dma, p->maddr, p->msize, - p->hw_opt[0], p->hw_opt[1], p->hw_opt[2], p->hw_opt[3]); - return 0; -} - -static int status_show(struct seq_file *m, void *v) -{ - struct wan_device *p = v; - if (v == SEQ_START_TOKEN) { - seq_puts(m, "Device name |protocol|station|interface|" - "clocking|baud rate| MTU |ndev|link state\n"); - return 0; - } - if (!p->state) - return 0; - seq_printf(m, "%-15s|%-8s| %-7s| %-9s|%-8s|%9u|%5u|%3u |", - p->name, - PROT_DECODE(p->config_id), - p->config_id == WANCONFIG_FR ? - (p->station ? "Node" : "CPE") : - (p->config_id == WANCONFIG_X25 ? - (p->station ? "DCE" : "DTE") : - ("N/A")), - p->interface ? "V.35" : "RS-232", - p->clocking ? "internal" : "external", - p->bps, - p->mtu, - p->ndev); - - switch (p->state) { - case WAN_UNCONFIGURED: - seq_printf(m, "%-12s\n", "unconfigured"); - break; - case WAN_DISCONNECTED: - seq_printf(m, "%-12s\n", "disconnected"); - break; - case WAN_CONNECTING: - seq_printf(m, "%-12s\n", "connecting"); - break; - case WAN_CONNECTED: - seq_printf(m, "%-12s\n", "connected"); - break; - default: - seq_printf(m, "%-12s\n", "invalid"); - break; - } - return 0; -} - -static const struct seq_operations config_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = config_show, -}; - -static const struct seq_operations status_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = status_show, -}; - -static int config_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &config_op); -} - -static int status_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &status_op); -} - -static const struct file_operations config_fops = { - .owner = THIS_MODULE, - .open = config_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations status_fops = { - .owner = THIS_MODULE, - .open = status_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int wandev_show(struct seq_file *m, void *v) -{ - struct wan_device *wandev = m->private; - - if (wandev->magic != ROUTER_MAGIC) - return 0; - - if (!wandev->state) { - seq_puts(m, "device is not configured!\n"); - return 0; - } - - /* Update device statistics */ - if (wandev->update) { - int err = wandev->update(wandev); - if (err == -EAGAIN) { - seq_puts(m, "Device is busy!\n"); - return 0; - } - if (err) { - seq_puts(m, "Device is not configured!\n"); - return 0; - } - } - - seq_printf(m, PROC_STATS_FORMAT, - "total packets received", wandev->stats.rx_packets); - seq_printf(m, PROC_STATS_FORMAT, - "total packets transmitted", wandev->stats.tx_packets); - seq_printf(m, PROC_STATS_FORMAT, - "total bytes received", wandev->stats.rx_bytes); - seq_printf(m, PROC_STATS_FORMAT, - "total bytes transmitted", wandev->stats.tx_bytes); - seq_printf(m, PROC_STATS_FORMAT, - "bad packets received", wandev->stats.rx_errors); - seq_printf(m, PROC_STATS_FORMAT, - "packet transmit problems", wandev->stats.tx_errors); - seq_printf(m, PROC_STATS_FORMAT, - "received frames dropped", wandev->stats.rx_dropped); - seq_printf(m, PROC_STATS_FORMAT, - "transmit frames dropped", wandev->stats.tx_dropped); - seq_printf(m, PROC_STATS_FORMAT, - "multicast packets received", wandev->stats.multicast); - seq_printf(m, PROC_STATS_FORMAT, - "transmit collisions", wandev->stats.collisions); - seq_printf(m, PROC_STATS_FORMAT, - "receive length errors", wandev->stats.rx_length_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver overrun errors", wandev->stats.rx_over_errors); - seq_printf(m, PROC_STATS_FORMAT, - "CRC errors", wandev->stats.rx_crc_errors); - seq_printf(m, PROC_STATS_FORMAT, - "frame format errors (aborts)", wandev->stats.rx_frame_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver fifo overrun", wandev->stats.rx_fifo_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver missed packet", wandev->stats.rx_missed_errors); - seq_printf(m, PROC_STATS_FORMAT, - "aborted frames transmitted", wandev->stats.tx_aborted_errors); - return 0; -} - -static int wandev_open(struct inode *inode, struct file *file) -{ - return single_open(file, wandev_show, PDE(inode)->data); -} - -static const struct file_operations wandev_fops = { - .owner = THIS_MODULE, - .open = wandev_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .unlocked_ioctl = wanrouter_ioctl, -}; - -/* - * Initialize router proc interface. - */ - -int __init wanrouter_proc_init(void) -{ - struct proc_dir_entry *p; - proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net); - if (!proc_router) - goto fail; - - p = proc_create("config", S_IRUGO, proc_router, &config_fops); - if (!p) - goto fail_config; - p = proc_create("status", S_IRUGO, proc_router, &status_fops); - if (!p) - goto fail_stat; - return 0; -fail_stat: - remove_proc_entry("config", proc_router); -fail_config: - remove_proc_entry(ROUTER_NAME, init_net.proc_net); -fail: - return -ENOMEM; -} - -/* - * Clean up router proc interface. - */ - -void wanrouter_proc_cleanup(void) -{ - remove_proc_entry("config", proc_router); - remove_proc_entry("status", proc_router); - remove_proc_entry(ROUTER_NAME, init_net.proc_net); -} - -/* - * Add directory entry for WAN device. - */ - -int wanrouter_proc_add(struct wan_device* wandev) -{ - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - - wandev->dent = proc_create(wandev->name, S_IRUGO, - proc_router, &wandev_fops); - if (!wandev->dent) - return -ENOMEM; - wandev->dent->data = wandev; - return 0; -} - -/* - * Delete directory entry for WAN device. - */ -int wanrouter_proc_delete(struct wan_device* wandev) -{ - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - remove_proc_entry(wandev->name, proc_router); - return 0; -} - -#else - -/* - * No /proc - output stubs - */ - -int __init wanrouter_proc_init(void) -{ - return 0; -} - -void wanrouter_proc_cleanup(void) -{ -} - -int wanrouter_proc_add(struct wan_device *wandev) -{ - return 0; -} - -int wanrouter_proc_delete(struct wan_device *wandev) -{ - return 0; -} - -#endif - -/* - * End - */ - diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 324e8d8..a4a14e8 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -46,3 +46,65 @@ int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, return err; } + +void cfg80211_ch_switch_notify(struct net_device *dev, + struct cfg80211_chan_def *chandef) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + trace_cfg80211_ch_switch_notify(dev, chandef); + + wdev_lock(wdev); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO)) + goto out; + + wdev->channel = chandef->chan; + nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL); +out: + wdev_unlock(wdev); + return; +} +EXPORT_SYMBOL(cfg80211_ch_switch_notify); + +bool cfg80211_rx_spurious_frame(struct net_device *dev, + const u8 *addr, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + bool ret; + + trace_cfg80211_rx_spurious_frame(dev, addr); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO)) { + trace_cfg80211_return_bool(false); + return false; + } + ret = nl80211_unexpected_frame(dev, addr, gfp); + trace_cfg80211_return_bool(ret); + return ret; +} +EXPORT_SYMBOL(cfg80211_rx_spurious_frame); + +bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, + const u8 *addr, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + bool ret; + + trace_cfg80211_rx_unexpected_4addr_frame(dev, addr); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO && + wdev->iftype != NL80211_IFTYPE_AP_VLAN)) { + trace_cfg80211_return_bool(false); + return false; + } + ret = nl80211_unexpected_4addr_frame(dev, addr, gfp); + trace_cfg80211_return_bool(ret); + return ret; +} +EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame); diff --git a/net/wireless/chan.c b/net/wireless/chan.c index a7990bb..fd556ac 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -76,6 +76,10 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) return false; if (!chandef->center_freq2) return false; + /* adjacent is not allowed -- that's a 160 MHz channel */ + if (chandef->center_freq1 - chandef->center_freq2 == 80 || + chandef->center_freq2 - chandef->center_freq1 == 80) + return false; break; case NL80211_CHAN_WIDTH_80: if (chandef->center_freq1 != control_freq + 30 && @@ -143,6 +147,32 @@ static void chandef_primary_freqs(const struct cfg80211_chan_def *c, } } +static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c) +{ + int width; + + switch (c->width) { + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: + width = 20; + break; + case NL80211_CHAN_WIDTH_40: + width = 40; + break; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_80: + width = 80; + break; + case NL80211_CHAN_WIDTH_160: + width = 160; + break; + default: + WARN_ON_ONCE(1); + return -1; + } + return width; +} + const struct cfg80211_chan_def * cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, const struct cfg80211_chan_def *c2) @@ -188,6 +218,93 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, } EXPORT_SYMBOL(cfg80211_chandef_compatible); +static void cfg80211_set_chans_dfs_state(struct wiphy *wiphy, u32 center_freq, + u32 bandwidth, + enum nl80211_dfs_state dfs_state) +{ + struct ieee80211_channel *c; + u32 freq; + + for (freq = center_freq - bandwidth/2 + 10; + freq <= center_freq + bandwidth/2 - 10; + freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c || !(c->flags & IEEE80211_CHAN_RADAR)) + continue; + + c->dfs_state = dfs_state; + c->dfs_state_entered = jiffies; + } +} + +void cfg80211_set_dfs_state(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + enum nl80211_dfs_state dfs_state) +{ + int width; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return; + + cfg80211_set_chans_dfs_state(wiphy, chandef->center_freq1, + width, dfs_state); + + if (!chandef->center_freq2) + return; + cfg80211_set_chans_dfs_state(wiphy, chandef->center_freq2, + width, dfs_state); +} + +static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, + u32 center_freq, + u32 bandwidth) +{ + struct ieee80211_channel *c; + u32 freq; + + for (freq = center_freq - bandwidth/2 + 10; + freq <= center_freq + bandwidth/2 - 10; + freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c) + return -EINVAL; + + if (c->flags & IEEE80211_CHAN_RADAR) + return 1; + } + return 0; +} + + +int cfg80211_chandef_dfs_required(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef) +{ + int width; + int r; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return -EINVAL; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return -EINVAL; + + r = cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq1, + width); + if (r) + return r; + + if (!chandef->center_freq2) + return 0; + + return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2, + width); +} + static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 center_freq, u32 bandwidth, u32 prohibited_flags) @@ -199,7 +316,16 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, freq <= center_freq + bandwidth/2 - 10; freq += 20) { c = ieee80211_get_channel(wiphy, freq); - if (!c || c->flags & prohibited_flags) + if (!c) + return false; + + /* check for radar flags */ + if ((prohibited_flags & c->flags & IEEE80211_CHAN_RADAR) && + (c->dfs_state != NL80211_DFS_AVAILABLE)) + return false; + + /* check for the other flags */ + if (c->flags & prohibited_flags & ~IEEE80211_CHAN_RADAR) return false; } @@ -249,6 +375,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, case NL80211_CHAN_WIDTH_80: if (!vht_cap->vht_supported) return false; + prohibited_flags |= IEEE80211_CHAN_NO_80MHZ; width = 80; break; case NL80211_CHAN_WIDTH_160: @@ -256,6 +383,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, return false; if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ)) return false; + prohibited_flags |= IEEE80211_CHAN_NO_160MHZ; width = 160; break; default: @@ -263,7 +391,16 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, return false; } - /* TODO: missing regulatory check on 80/160 bandwidth */ + /* + * TODO: What if there are only certain 80/160/80+80 MHz channels + * allowed by the driver, or only certain combinations? + * For 40 MHz the driver can set the NO_HT40 flags, but for + * 80/160 MHz and in particular 80+80 MHz this isn't really + * feasible and we only have NO_80MHZ/NO_160MHZ so far but + * no way to cover 80+80 MHz or more complex restrictions. + * Note that such restrictions also need to be advertised to + * userspace, for example for P2P channel selection. + */ if (width > 20) prohibited_flags |= IEEE80211_CHAN_NO_OFDM; @@ -340,7 +477,10 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - if (wdev->beacon_interval) { + if (wdev->cac_started) { + *chan = wdev->channel; + *chanmode = CHAN_MODE_SHARED; + } else if (wdev->beacon_interval) { *chan = wdev->channel; *chanmode = CHAN_MODE_SHARED; } diff --git a/net/wireless/core.c b/net/wireless/core.c index 14d9904..6ddf74f 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -57,9 +57,6 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) { struct cfg80211_registered_device *result = NULL, *rdev; - if (!wiphy_idx_valid(wiphy_idx)) - return NULL; - assert_cfg80211_lock(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { @@ -74,10 +71,8 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) int get_wiphy_idx(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev; - if (!wiphy) - return WIPHY_IDX_STALE; - rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + return rdev->wiphy_idx; } @@ -86,9 +81,6 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) { struct cfg80211_registered_device *rdev; - if (!wiphy_idx_valid(wiphy_idx)) - return NULL; - assert_cfg80211_lock(); rdev = cfg80211_rdev_by_wiphy_idx(wiphy_idx); @@ -220,6 +212,39 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) rdev_rfkill_poll(rdev); } +void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + lockdep_assert_held(&rdev->devlist_mtx); + lockdep_assert_held(&rdev->sched_scan_mtx); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)) + return; + + if (!wdev->p2p_started) + return; + + rdev_stop_p2p_device(rdev, wdev); + wdev->p2p_started = false; + + rdev->opencount--; + + if (rdev->scan_req && rdev->scan_req->wdev == wdev) { + bool busy = work_busy(&rdev->scan_done_wk); + + /* + * If the work isn't pending or running (in which case it would + * be waiting for the lock we hold) the driver didn't properly + * cancel the scan when the interface was removed. In this case + * warn and leak the scan request object to not crash later. + */ + WARN_ON(!busy); + + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, !busy); + } +} + static int cfg80211_rfkill_set_block(void *data, bool blocked) { struct cfg80211_registered_device *rdev = data; @@ -229,7 +254,8 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) return 0; rtnl_lock(); - mutex_lock(&rdev->devlist_mtx); + + /* read-only iteration need not hold the devlist_mtx */ list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->netdev) { @@ -239,18 +265,18 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) /* otherwise, check iftype */ switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: - if (!wdev->p2p_started) - break; - rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; - rdev->opencount--; + /* but this requires it */ + mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); + cfg80211_stop_p2p_device(rdev, wdev); + mutex_unlock(&rdev->sched_scan_mtx); + mutex_unlock(&rdev->devlist_mtx); break; default: break; } } - mutex_unlock(&rdev->devlist_mtx); rtnl_unlock(); return 0; @@ -309,7 +335,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy_idx = wiphy_counter++; - if (unlikely(!wiphy_idx_valid(rdev->wiphy_idx))) { + if (unlikely(rdev->wiphy_idx < 0)) { wiphy_counter--; mutex_unlock(&cfg80211_mutex); /* ugh, wrapped! */ @@ -332,6 +358,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) INIT_LIST_HEAD(&rdev->bss_list); INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); INIT_WORK(&rdev->sched_scan_results_wk, __cfg80211_sched_scan_results); + INIT_DELAYED_WORK(&rdev->dfs_update_channels_wk, + cfg80211_dfs_channels_update_work); #ifdef CONFIG_CFG80211_WEXT rdev->wiphy.wext = &cfg80211_wext_handler; #endif @@ -390,8 +418,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) c = &wiphy->iface_combinations[i]; - /* Combinations with just one interface aren't real */ - if (WARN_ON(c->max_interfaces < 2)) + /* + * Combinations with just one interface aren't real, + * however we make an exception for DFS. + */ + if (WARN_ON((c->max_interfaces < 2) && !c->radar_detect_widths)) return -EINVAL; /* Need at least one channel */ @@ -406,6 +437,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) CFG80211_MAX_NUM_DIFFERENT_CHANNELS)) return -EINVAL; + /* DFS only works on one channel. */ + if (WARN_ON(c->radar_detect_widths && + (c->num_different_channels > 1))) + return -EINVAL; + if (WARN_ON(!c->n_limits)) return -EINVAL; @@ -478,6 +514,11 @@ int wiphy_register(struct wiphy *wiphy) ETH_ALEN))) return -EINVAL; + if (WARN_ON(wiphy->max_acl_mac_addrs && + (!(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME) || + !rdev->ops->set_mac_acl))) + return -EINVAL; + if (wiphy->addresses) memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN); @@ -690,6 +731,7 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->scan_done_wk); cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); + cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); if (rdev->wowlan && rdev->ops->set_wakeup) rdev_set_wakeup(rdev, false); @@ -710,7 +752,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) kfree(reg); } list_for_each_entry_safe(scan, tmp, &rdev->bss_list, list) - cfg80211_put_bss(&scan->pub); + cfg80211_put_bss(&rdev->wiphy, &scan->pub); kfree(rdev); } @@ -737,17 +779,13 @@ static void wdev_cleanup_work(struct work_struct *work) wdev = container_of(work, struct wireless_dev, cleanup_work); rdev = wiphy_to_dev(wdev->wiphy); - cfg80211_lock_rdev(rdev); + mutex_lock(&rdev->sched_scan_mtx); if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { rdev->scan_req->aborted = true; ___cfg80211_scan_done(rdev, true); } - cfg80211_unlock_rdev(rdev); - - mutex_lock(&rdev->sched_scan_mtx); - if (WARN_ON(rdev->sched_scan_req && rdev->sched_scan_req->dev == wdev->netdev)) { __cfg80211_stop_sched_scan(rdev, false); @@ -773,21 +811,19 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) return; mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); list_del_rcu(&wdev->list); rdev->devlist_generation++; switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: - if (!wdev->p2p_started) - break; - rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; - rdev->opencount--; + cfg80211_stop_p2p_device(rdev, wdev); break; default: WARN_ON_ONCE(1); break; } + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); } EXPORT_SYMBOL(cfg80211_unregister_wdev); @@ -866,8 +902,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, /* allow mac80211 to determine the timeout */ wdev->ps_timeout = -1; - if (!dev->ethtool_ops) - dev->ethtool_ops = &cfg80211_ethtool_ops; + netdev_set_default_ethtool_ops(dev, &cfg80211_ethtool_ops); if ((wdev->iftype == NL80211_IFTYPE_STATION || wdev->iftype == NL80211_IFTYPE_P2P_CLIENT || @@ -929,6 +964,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, cfg80211_update_iface_num(rdev, wdev->iftype, 1); cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); switch (wdev->iftype) { #ifdef CONFIG_CFG80211_WEXT @@ -960,6 +996,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; } wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); rdev->opencount++; mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); diff --git a/net/wireless/core.h b/net/wireless/core.h index 3563097..5845c2b 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -8,7 +8,6 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/netdevice.h> -#include <linux/kref.h> #include <linux/rbtree.h> #include <linux/debugfs.h> #include <linux/rfkill.h> @@ -18,6 +17,9 @@ #include <net/cfg80211.h> #include "reg.h" + +#define WIPHY_IDX_INVALID -1 + struct cfg80211_registered_device { const struct cfg80211_ops *ops; struct list_head list; @@ -84,9 +86,11 @@ struct cfg80211_registered_device { struct cfg80211_wowlan *wowlan; + struct delayed_work dfs_update_channels_wk; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ - struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN))); + struct wiphy wiphy __aligned(NETDEV_ALIGN); }; static inline @@ -96,13 +100,6 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) return container_of(wiphy, struct cfg80211_registered_device, wiphy); } -/* Note 0 is valid, hence phy0 */ -static inline -bool wiphy_idx_valid(int wiphy_idx) -{ - return wiphy_idx >= 0; -} - static inline void cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) { @@ -113,6 +110,9 @@ cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) for (i = 0; i < rdev->wowlan->n_patterns; i++) kfree(rdev->wowlan->patterns[i].mask); kfree(rdev->wowlan->patterns); + if (rdev->wowlan->tcp && rdev->wowlan->tcp->sock) + sock_release(rdev->wowlan->tcp->sock); + kfree(rdev->wowlan->tcp); kfree(rdev->wowlan); } @@ -126,17 +126,12 @@ static inline void assert_cfg80211_lock(void) lockdep_assert_held(&cfg80211_mutex); } -/* - * You can use this to mark a wiphy_idx as not having an associated wiphy. - * It guarantees cfg80211_rdev_by_wiphy_idx(wiphy_idx) will return NULL - */ -#define WIPHY_IDX_STALE -1 - struct cfg80211_internal_bss { struct list_head list; + struct list_head hidden_list; struct rb_node rbn; unsigned long ts; - struct kref ref; + unsigned long refcount; atomic_t hold; /* must be last because of priv member */ @@ -435,7 +430,24 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode); + enum cfg80211_chan_mode chanmode, + u8 radar_detect); + +/** + * cfg80211_chandef_dfs_required - checks if radar detection is required + * @wiphy: the wiphy to validate against + * @chandef: the channel definition to check + * Return: 1 if radar detection is required, 0 if it is not, < 0 on error + */ +int cfg80211_chandef_dfs_required(struct wiphy *wiphy, + const struct cfg80211_chan_def *c); + +void cfg80211_set_dfs_state(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + enum nl80211_dfs_state dfs_state); + +void cfg80211_dfs_channels_update_work(struct work_struct *work); + static inline int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, @@ -443,7 +455,7 @@ cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { return cfg80211_can_use_iftype_chan(rdev, wdev, iftype, NULL, - CHAN_MODE_UNDEFINED); + CHAN_MODE_UNDEFINED, 0); } static inline int @@ -460,7 +472,17 @@ cfg80211_can_use_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chanmode) { return cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - chan, chanmode); + chan, chanmode, 0); +} + +static inline unsigned int elapsed_jiffies_msecs(unsigned long start) +{ + unsigned long end = jiffies; + + if (end >= start) + return jiffies_to_msecs(end - start); + + return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1); } void @@ -481,6 +503,9 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); +void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); + #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index 48c48ff..e37862f 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -15,10 +15,10 @@ static void cfg80211_get_drvinfo(struct net_device *dev, strlcpy(info->version, init_utsname()->release, sizeof(info->version)); if (wdev->wiphy->fw_version[0]) - strncpy(info->fw_version, wdev->wiphy->fw_version, + strlcpy(info->fw_version, wdev->wiphy->fw_version, sizeof(info->fw_version)); else - strncpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); strlcpy(info->bus_info, dev_name(wiphy_dev(wdev->wiphy)), sizeof(info->bus_info)); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 9b9551e..d80e471 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -37,7 +37,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid) if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); } cfg80211_hold_bss(bss_from_pub(bss)); @@ -182,7 +182,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); } wdev->current_bss = NULL; diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index f9d6ce5..55957a2 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -44,6 +44,10 @@ #define MESH_SYNC_NEIGHBOR_OFFSET_MAX 50 +#define MESH_DEFAULT_BEACON_INTERVAL 1000 /* in 1024 us units (=TUs) */ +#define MESH_DEFAULT_DTIM_PERIOD 2 +#define MESH_DEFAULT_AWAKE_WINDOW 10 /* in 1024 us units (=TUs) */ + const struct mesh_config default_mesh_config = { .dot11MeshRetryTimeout = MESH_RET_T, .dot11MeshConfirmTimeout = MESH_CONF_T, @@ -69,6 +73,8 @@ const struct mesh_config default_mesh_config = { .dot11MeshHWMPactivePathToRootTimeout = MESH_PATH_TO_ROOT_TIMEOUT, .dot11MeshHWMProotInterval = MESH_ROOT_INTERVAL, .dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL, + .power_mode = NL80211_MESH_POWER_ACTIVE, + .dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW, }; const struct mesh_setup default_mesh_setup = { @@ -79,6 +85,8 @@ const struct mesh_setup default_mesh_setup = { .ie = NULL, .ie_len = 0, .is_secure = false, + .beacon_interval = MESH_DEFAULT_BEACON_INTERVAL, + .dtim_period = MESH_DEFAULT_DTIM_PERIOD, }; int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 5e8123e..caddca3 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -58,7 +58,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, */ if (status_code != WLAN_STATUS_SUCCESS && wdev->conn && cfg80211_sme_failed_reassoc(wdev)) { - cfg80211_put_bss(bss); + cfg80211_put_bss(wiphy, bss); goto out; } @@ -70,7 +70,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, * do not call connect_result() now because the * sme will schedule work that does it later. */ - cfg80211_put_bss(bss); + cfg80211_put_bss(wiphy, bss); goto out; } @@ -108,7 +108,7 @@ void __cfg80211_send_deauth(struct net_device *dev, if (wdev->current_bss && ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) { cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; was_current = true; } @@ -164,7 +164,7 @@ void __cfg80211_send_disassoc(struct net_device *dev, ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) { cfg80211_sme_disassoc(dev, wdev->current_bss); cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; } else WARN_ON(1); @@ -324,7 +324,7 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, err = rdev_auth(rdev, dev, &req); out: - cfg80211_put_bss(req.bss); + cfg80211_put_bss(&rdev->wiphy, req.bss); return err; } @@ -432,7 +432,7 @@ out: if (err) { if (was_connected) wdev->sme_state = CFG80211_SME_CONNECTED; - cfg80211_put_bss(req.bss); + cfg80211_put_bss(&rdev->wiphy, req.bss); } return err; @@ -514,7 +514,7 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, if (wdev->sme_state != CFG80211_SME_CONNECTED) return -ENOTCONN; - if (WARN_ON(!wdev->current_bss)) + if (WARN(!wdev->current_bss, "sme_state=%d\n", wdev->sme_state)) return -ENOTCONN; memset(&req, 0, sizeof(req)); @@ -572,7 +572,7 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(&rdev->wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; } } @@ -988,64 +988,122 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, } EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); -void cfg80211_ch_switch_notify(struct net_device *dev, - struct cfg80211_chan_def *chandef) +void cfg80211_dfs_channels_update_work(struct work_struct *work) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct delayed_work *delayed_work; + struct cfg80211_registered_device *rdev; + struct cfg80211_chan_def chandef; + struct ieee80211_supported_band *sband; + struct ieee80211_channel *c; + struct wiphy *wiphy; + bool check_again = false; + unsigned long timeout, next_time = 0; + int bandid, i; + + delayed_work = container_of(work, struct delayed_work, work); + rdev = container_of(delayed_work, struct cfg80211_registered_device, + dfs_update_channels_wk); + wiphy = &rdev->wiphy; + + mutex_lock(&cfg80211_mutex); + for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) { + sband = wiphy->bands[bandid]; + if (!sband) + continue; - trace_cfg80211_ch_switch_notify(dev, chandef); + for (i = 0; i < sband->n_channels; i++) { + c = &sband->channels[i]; - wdev_lock(wdev); + if (c->dfs_state != NL80211_DFS_UNAVAILABLE) + continue; - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO)) - goto out; + timeout = c->dfs_state_entered + + IEEE80211_DFS_MIN_NOP_TIME_MS; - wdev->channel = chandef->chan; - nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL); -out: - wdev_unlock(wdev); - return; + if (time_after_eq(jiffies, timeout)) { + c->dfs_state = NL80211_DFS_USABLE; + cfg80211_chandef_create(&chandef, c, + NL80211_CHAN_NO_HT); + + nl80211_radar_notify(rdev, &chandef, + NL80211_RADAR_NOP_FINISHED, + NULL, GFP_ATOMIC); + continue; + } + + if (!check_again) + next_time = timeout - jiffies; + else + next_time = min(next_time, timeout - jiffies); + check_again = true; + } + } + mutex_unlock(&cfg80211_mutex); + + /* reschedule if there are other channels waiting to be cleared again */ + if (check_again) + queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, + next_time); } -EXPORT_SYMBOL(cfg80211_ch_switch_notify); -bool cfg80211_rx_spurious_frame(struct net_device *dev, - const u8 *addr, gfp_t gfp) + +void cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - bool ret; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + unsigned long timeout; - trace_cfg80211_rx_spurious_frame(dev, addr); + trace_cfg80211_radar_event(wiphy, chandef); - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO)) { - trace_cfg80211_return_bool(false); - return false; - } - ret = nl80211_unexpected_frame(dev, addr, gfp); - trace_cfg80211_return_bool(ret); - return ret; + /* only set the chandef supplied channel to unavailable, in + * case the radar is detected on only one of multiple channels + * spanned by the chandef. + */ + cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE); + + timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_NOP_TIME_MS); + queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, + timeout); + + nl80211_radar_notify(rdev, chandef, NL80211_RADAR_DETECTED, NULL, gfp); } -EXPORT_SYMBOL(cfg80211_rx_spurious_frame); +EXPORT_SYMBOL(cfg80211_radar_event); -bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, - const u8 *addr, gfp_t gfp) +void cfg80211_cac_event(struct net_device *netdev, + enum nl80211_radar_event event, gfp_t gfp) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - bool ret; + struct wireless_dev *wdev = netdev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_chan_def chandef; + unsigned long timeout; - trace_cfg80211_rx_unexpected_4addr_frame(dev, addr); + trace_cfg80211_cac_event(netdev, event); - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO && - wdev->iftype != NL80211_IFTYPE_AP_VLAN)) { - trace_cfg80211_return_bool(false); - return false; + if (WARN_ON(!wdev->cac_started)) + return; + + if (WARN_ON(!wdev->channel)) + return; + + cfg80211_chandef_create(&chandef, wdev->channel, NL80211_CHAN_NO_HT); + + switch (event) { + case NL80211_RADAR_CAC_FINISHED: + timeout = wdev->cac_start_time + + msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS); + WARN_ON(!time_after_eq(jiffies, timeout)); + cfg80211_set_dfs_state(wiphy, &chandef, NL80211_DFS_AVAILABLE); + break; + case NL80211_RADAR_CAC_ABORTED: + break; + default: + WARN_ON(1); + return; } - ret = nl80211_unexpected_4addr_frame(dev, addr, gfp); - trace_cfg80211_return_bool(ret); - return ret; + wdev->cac_started = false; + + nl80211_radar_notify(rdev, &chandef, event, netdev, gfp); } -EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame); +EXPORT_SYMBOL(cfg80211_cac_event); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f45706a..58e13a8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -19,6 +19,7 @@ #include <net/genetlink.h> #include <net/cfg80211.h> #include <net/sock.h> +#include <net/inet_connection_sock.h> #include "core.h" #include "nl80211.h" #include "reg.h" @@ -365,6 +366,10 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 }, + [NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 }, + [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, + [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, + [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, }, }; /* policy for the key attributes */ @@ -397,6 +402,26 @@ nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = { [NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_RFKILL_RELEASE] = { .type = NLA_FLAG }, + [NL80211_WOWLAN_TRIG_TCP_CONNECTION] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = { + [NL80211_WOWLAN_TCP_SRC_IPV4] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_DST_IPV4] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_DST_MAC] = { .len = ETH_ALEN }, + [NL80211_WOWLAN_TCP_SRC_PORT] = { .type = NLA_U16 }, + [NL80211_WOWLAN_TCP_DST_PORT] = { .type = NLA_U16 }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD] = { .len = 1 }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ] = { + .len = sizeof(struct nl80211_wowlan_tcp_data_seq) + }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN] = { + .len = sizeof(struct nl80211_wowlan_tcp_data_token) + }, + [NL80211_WOWLAN_TCP_DATA_INTERVAL] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .len = 1 }, + [NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 }, }; /* policy for GTK rekey offload attributes */ @@ -1233,6 +1258,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag dev->wiphy.wowlan.pattern_min_len, .max_pattern_len = dev->wiphy.wowlan.pattern_max_len, + .max_pkt_offset = + dev->wiphy.wowlan.max_pkt_offset, }; if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, sizeof(pat), &pat)) @@ -1265,6 +1292,12 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag dev->wiphy.ht_capa_mod_mask)) goto nla_put_failure; + if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && + dev->wiphy.max_acl_mac_addrs && + nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, + dev->wiphy.max_acl_mac_addrs)) + goto nla_put_failure; + return genlmsg_end(msg, hdr); nla_put_failure: @@ -1274,7 +1307,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) { - int idx = 0; + int idx = 0, ret; int start = cb->args[0]; struct cfg80211_registered_device *dev; @@ -1284,9 +1317,29 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) continue; if (++idx <= start) continue; - if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - dev) < 0) { + ret = nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + dev); + if (ret < 0) { + /* + * If sending the wiphy data didn't fit (ENOBUFS or + * EMSGSIZE returned), this SKB is still empty (so + * it's not too big because another wiphy dataset is + * already in the skb) and we've not tried to adjust + * the dump allocation yet ... then adjust the alloc + * size to be bigger, and return 1 but with the empty + * skb. This results in an empty message being RX'ed + * in userspace, but that is ignored. + * + * We can then retry with the larger buffer. + */ + if ((ret == -ENOBUFS || ret == -EMSGSIZE) && + !skb->len && + cb->min_dump_alloc < 4096) { + cb->min_dump_alloc = 4096; + mutex_unlock(&cfg80211_mutex); + return 1; + } idx--; break; } @@ -1303,7 +1356,7 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; struct cfg80211_registered_device *dev = info->user_ptr[0]; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + msg = nlmsg_new(4096, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -2079,6 +2132,13 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) !(rdev->wiphy.interface_modes & (1 << type))) return -EOPNOTSUPP; + if (type == NL80211_IFTYPE_P2P_DEVICE && info->attrs[NL80211_ATTR_MAC]) { + nla_memcpy(params.macaddr, info->attrs[NL80211_ATTR_MAC], + ETH_ALEN); + if (!is_valid_ether_addr(params.macaddr)) + return -EADDRNOTAVAIL; + } + if (info->attrs[NL80211_ATTR_4ADDR]) { params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type); @@ -2481,6 +2541,97 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) return err; } +/* This function returns an error or the number of nested attributes */ +static int validate_acl_mac_addrs(struct nlattr *nl_attr) +{ + struct nlattr *attr; + int n_entries = 0, tmp; + + nla_for_each_nested(attr, nl_attr, tmp) { + if (nla_len(attr) != ETH_ALEN) + return -EINVAL; + + n_entries++; + } + + return n_entries; +} + +/* + * This function parses ACL information and allocates memory for ACL data. + * On successful return, the calling function is responsible to free the + * ACL buffer returned by this function. + */ +static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy, + struct genl_info *info) +{ + enum nl80211_acl_policy acl_policy; + struct nlattr *attr; + struct cfg80211_acl_data *acl; + int i = 0, n_entries, tmp; + + if (!wiphy->max_acl_mac_addrs) + return ERR_PTR(-EOPNOTSUPP); + + if (!info->attrs[NL80211_ATTR_ACL_POLICY]) + return ERR_PTR(-EINVAL); + + acl_policy = nla_get_u32(info->attrs[NL80211_ATTR_ACL_POLICY]); + if (acl_policy != NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED && + acl_policy != NL80211_ACL_POLICY_DENY_UNLESS_LISTED) + return ERR_PTR(-EINVAL); + + if (!info->attrs[NL80211_ATTR_MAC_ADDRS]) + return ERR_PTR(-EINVAL); + + n_entries = validate_acl_mac_addrs(info->attrs[NL80211_ATTR_MAC_ADDRS]); + if (n_entries < 0) + return ERR_PTR(n_entries); + + if (n_entries > wiphy->max_acl_mac_addrs) + return ERR_PTR(-ENOTSUPP); + + acl = kzalloc(sizeof(*acl) + (sizeof(struct mac_address) * n_entries), + GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + nla_for_each_nested(attr, info->attrs[NL80211_ATTR_MAC_ADDRS], tmp) { + memcpy(acl->mac_addrs[i].addr, nla_data(attr), ETH_ALEN); + i++; + } + + acl->n_acl_entries = n_entries; + acl->acl_policy = acl_policy; + + return acl; +} + +static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct cfg80211_acl_data *acl; + int err; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; + + if (!dev->ieee80211_ptr->beacon_interval) + return -EINVAL; + + acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + err = rdev_set_mac_acl(rdev, dev, acl); + + kfree(acl); + + return err; +} + static int nl80211_parse_beacon(struct genl_info *info, struct cfg80211_beacon_data *bcn) { @@ -2598,6 +2749,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_settings params; int err; + u8 radar_detect_width = 0; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) @@ -2716,14 +2868,30 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef)) return -EINVAL; + err = cfg80211_chandef_dfs_required(wdev->wiphy, ¶ms.chandef); + if (err < 0) + return err; + if (err) { + radar_detect_width = BIT(params.chandef.width); + params.radar_required = true; + } + mutex_lock(&rdev->devlist_mtx); - err = cfg80211_can_use_chan(rdev, wdev, params.chandef.chan, - CHAN_MODE_SHARED); + err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, + params.chandef.chan, + CHAN_MODE_SHARED, + radar_detect_width); mutex_unlock(&rdev->devlist_mtx); if (err) return err; + if (info->attrs[NL80211_ATTR_ACL_POLICY]) { + params.acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(params.acl)) + return PTR_ERR(params.acl); + } + err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { wdev->preset_chandef = params.chandef; @@ -2732,6 +2900,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->ssid_len = params.ssid_len; memcpy(wdev->ssid, params.ssid, wdev->ssid_len); } + + kfree(params.acl); + return err; } @@ -2939,12 +3110,22 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_INACTIVE_TIME, sinfo->inactive_time)) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_RX_BYTES) && + if ((sinfo->filled & (STATION_INFO_RX_BYTES | + STATION_INFO_RX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES, - sinfo->rx_bytes)) + (u32)sinfo->rx_bytes)) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_TX_BYTES) && + if ((sinfo->filled & (STATION_INFO_TX_BYTES | + NL80211_STA_INFO_TX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES, + (u32)sinfo->tx_bytes)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_RX_BYTES64) && + nla_put_u64(msg, NL80211_STA_INFO_RX_BYTES64, + sinfo->rx_bytes)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_TX_BYTES64) && + nla_put_u64(msg, NL80211_STA_INFO_TX_BYTES64, sinfo->tx_bytes)) goto nla_put_failure; if ((sinfo->filled & STATION_INFO_LLID) && @@ -3001,6 +3182,18 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS, sinfo->beacon_loss_count)) goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_LOCAL_PM) && + nla_put_u32(msg, NL80211_STA_INFO_LOCAL_PM, + sinfo->local_pm)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_PEER_PM) && + nla_put_u32(msg, NL80211_STA_INFO_PEER_PM, + sinfo->peer_pm)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_NONPEER_PM) && + nla_put_u32(msg, NL80211_STA_INFO_NONPEER_PM, + sinfo->nonpeer_pm)) + goto nla_put_failure; if (sinfo->filled & STATION_INFO_BSS_PARAM) { bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM); if (!bss_param) @@ -3160,6 +3353,54 @@ static struct net_device *get_vlan(struct genl_info *info, return ERR_PTR(ret); } +static struct nla_policy +nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] __read_mostly = { + [NL80211_STA_WME_UAPSD_QUEUES] = { .type = NLA_U8 }, + [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 }, +}; + +static int nl80211_set_station_tdls(struct genl_info *info, + struct station_parameters *params) +{ + struct nlattr *tb[NL80211_STA_WME_MAX + 1]; + struct nlattr *nla; + int err; + + /* Dummy STA entry gets updated once the peer capabilities are known */ + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) + params->ht_capa = + nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) + params->vht_capa = + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); + + /* parse WME attributes if present */ + if (!info->attrs[NL80211_ATTR_STA_WME]) + return 0; + + nla = info->attrs[NL80211_ATTR_STA_WME]; + err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla, + nl80211_sta_wme_policy); + if (err) + return err; + + if (tb[NL80211_STA_WME_UAPSD_QUEUES]) + params->uapsd_queues = nla_get_u8( + tb[NL80211_STA_WME_UAPSD_QUEUES]); + if (params->uapsd_queues & ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK) + return -EINVAL; + + if (tb[NL80211_STA_WME_MAX_SP]) + params->max_sp = nla_get_u8(tb[NL80211_STA_WME_MAX_SP]); + + if (params->max_sp & ~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK) + return -EINVAL; + + params->sta_modify_mask |= STATION_PARAM_APPLY_UAPSD; + + return 0; +} + static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -3188,13 +3429,21 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); } - if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) - params.listen_interval = - nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { + params.capability = + nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]); + params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY; + } - if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) - params.ht_capa = - nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) { + params.ext_capab = + nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + params.ext_capab_len = + nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + } + + if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) + return -EINVAL; if (!rdev->ops->change_station) return -EOPNOTSUPP; @@ -3210,6 +3459,17 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.plink_state = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); + if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) { + enum nl80211_mesh_power_mode pm = nla_get_u32( + info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]); + + if (pm <= NL80211_MESH_POWER_UNKNOWN || + pm > NL80211_MESH_POWER_MAX) + return -EINVAL; + + params.local_pm = pm; + } + switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: @@ -3217,6 +3477,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* disallow mesh-specific things */ if (params.plink_action) return -EINVAL; + if (params.local_pm) + return -EINVAL; /* TDLS can't be set, ... */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) @@ -3231,11 +3493,32 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* accept only the listed bits */ if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP))) return -EINVAL; + /* but authenticated/associated only if driver handles it */ + if (!(rdev->wiphy.features & + NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + + /* reject other things that can't change */ + if (params.supported_rates) + return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) + return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) + return -EINVAL; + if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || + info->attrs[NL80211_ATTR_VHT_CAPABILITY]) + return -EINVAL; + /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) @@ -3250,14 +3533,28 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) * to change the flag. */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); - /* fall through */ + /* Include parameters for TDLS peer (driver will check) */ + err = nl80211_set_station_tdls(info, ¶ms); + if (err) + return err; + /* disallow things sta doesn't support */ + if (params.plink_action) + return -EINVAL; + if (params.local_pm) + return -EINVAL; + /* reject any changes other than AUTHORIZED or WME (for TDLS) */ + if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_WME))) + return -EINVAL; + break; case NL80211_IFTYPE_ADHOC: /* disallow things sta doesn't support */ if (params.plink_action) return -EINVAL; - if (params.ht_capa) + if (params.local_pm) return -EINVAL; - if (params.listen_interval >= 0) + if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || + info->attrs[NL80211_ATTR_VHT_CAPABILITY]) return -EINVAL; /* reject any changes other than AUTHORIZED */ if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) @@ -3267,9 +3564,14 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* disallow things mesh doesn't support */ if (params.vlan) return -EINVAL; - if (params.ht_capa) + if (params.supported_rates) + return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) + return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) return -EINVAL; - if (params.listen_interval >= 0) + if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || + info->attrs[NL80211_ATTR_VHT_CAPABILITY]) return -EINVAL; /* * No special handling for TDLS here -- the userspace @@ -3295,12 +3597,6 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) return err; } -static struct nla_policy -nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] __read_mostly = { - [NL80211_STA_WME_UAPSD_QUEUES] = { .type = NLA_U8 }, - [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 }, -}; - static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -3335,6 +3631,19 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!params.aid || params.aid > IEEE80211_MAX_AID) return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { + params.capability = + nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]); + params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY; + } + + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) { + params.ext_capab = + nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + params.ext_capab_len = + nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + } + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); @@ -3393,17 +3702,31 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* but don't bother the driver with it */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + /* allow authenticated/associated only if driver handles it */ + if (!(rdev->wiphy.features & + NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) return PTR_ERR(params.vlan); break; case NL80211_IFTYPE_MESH_POINT: + /* associated is disallowed */ + if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + return -EINVAL; /* TDLS peers cannot be added */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) return -EINVAL; break; case NL80211_IFTYPE_STATION: + /* associated is disallowed */ + if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + return -EINVAL; /* Only TDLS peers can be added */ if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) return -EINVAL; @@ -3787,12 +4110,8 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) * window between nl80211_init() and regulatory_init(), if that is * even possible. */ - mutex_lock(&cfg80211_mutex); - if (unlikely(!cfg80211_regdomain)) { - mutex_unlock(&cfg80211_mutex); + if (unlikely(!rcu_access_pointer(cfg80211_regdomain))) return -EINPROGRESS; - } - mutex_unlock(&cfg80211_mutex); if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) return -EINVAL; @@ -3908,7 +4227,11 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u16(msg, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, cur_params.dot11MeshHWMProotInterval) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, - cur_params.dot11MeshHWMPconfirmationInterval)) + cur_params.dot11MeshHWMPconfirmationInterval) || + nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE, + cur_params.power_mode) || + nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW, + cur_params.dot11MeshAwakeWindowDuration)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -3947,6 +4270,8 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = { .type = NLA_U16 }, [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 }, + [NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 }, + [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 }, }; static const struct nla_policy @@ -3967,13 +4292,15 @@ static int nl80211_parse_mesh_config(struct genl_info *info, struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1]; u32 mask = 0; -#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \ -do {\ - if (table[attr_num]) {\ - cfg->param = nla_fn(table[attr_num]); \ - mask |= (1 << (attr_num - 1)); \ - } \ -} while (0);\ +#define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \ +do { \ + if (tb[attr]) { \ + if (fn(tb[attr]) < min || fn(tb[attr]) > max) \ + return -EINVAL; \ + cfg->param = fn(tb[attr]); \ + mask |= (1 << (attr - 1)); \ + } \ +} while (0) if (!info->attrs[NL80211_ATTR_MESH_CONFIG]) @@ -3988,83 +4315,98 @@ do {\ BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32); /* Fill in the params struct */ - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, 1, 255, mask, NL80211_MESHCONF_RETRY_TIMEOUT, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, 1, 255, mask, NL80211_MESHCONF_CONFIRM_TIMEOUT, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, 1, 255, mask, NL80211_MESHCONF_HOLDING_TIMEOUT, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, 0, 255, mask, NL80211_MESHCONF_MAX_PEER_LINKS, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, 0, 16, mask, NL80211_MESHCONF_MAX_RETRIES, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, 1, 255, mask, NL80211_MESHCONF_TTL, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, 1, 255, mask, NL80211_MESHCONF_ELEMENT_TTL, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, 0, 1, mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor, mask, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor, + 1, 255, mask, NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, nla_get_u32); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, 0, 255, mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, 1, 65535, mask, NL80211_MESHCONF_PATH_REFRESH_TIME, nla_get_u32); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, 1, 65535, mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, mask, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, + 1, 65535, mask, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval, - mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, + 1, 65535, mask, + NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval, - mask, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, + 1, 65535, mask, + NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshHWMPnetDiameterTraversalTime, mask, + dot11MeshHWMPnetDiameterTraversalTime, + 1, 65535, mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, mask, - NL80211_MESHCONF_HWMP_ROOTMODE, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, mask, - NL80211_MESHCONF_HWMP_RANN_INTERVAL, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, 0, 4, + mask, NL80211_MESHCONF_HWMP_ROOTMODE, + nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, 1, 65535, + mask, NL80211_MESHCONF_HWMP_RANN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshGateAnnouncementProtocol, mask, - NL80211_MESHCONF_GATE_ANNOUNCEMENTS, + dot11MeshGateAnnouncementProtocol, 0, 1, + mask, NL80211_MESHCONF_GATE_ANNOUNCEMENTS, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1, mask, NL80211_MESHCONF_FORWARDING, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, 1, 255, mask, NL80211_MESHCONF_RSSI_THRESHOLD, nla_get_u32); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16, mask, NL80211_MESHCONF_HT_OPMODE, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout, - mask, + 1, 65535, mask, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, nla_get_u32); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, 1, 65535, mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshHWMPconfirmationInterval, mask, + dot11MeshHWMPconfirmationInterval, + 1, 65535, mask, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode, + NL80211_MESH_POWER_ACTIVE, + NL80211_MESH_POWER_MAX, + mask, NL80211_MESHCONF_POWER_MODE, + nla_get_u32); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, + 0, 65535, mask, + NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); if (mask_out) *mask_out = mask; @@ -4152,6 +4494,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb, static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) { + const struct ieee80211_regdomain *regdom; struct sk_buff *msg; void *hdr = NULL; struct nlattr *nl_reg_rules; @@ -4174,35 +4517,36 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) if (!hdr) goto put_failure; - if (nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, - cfg80211_regdomain->alpha2) || - (cfg80211_regdomain->dfs_region && - nla_put_u8(msg, NL80211_ATTR_DFS_REGION, - cfg80211_regdomain->dfs_region))) - goto nla_put_failure; - if (reg_last_request_cell_base() && nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, NL80211_USER_REG_HINT_CELL_BASE)) goto nla_put_failure; + rcu_read_lock(); + regdom = rcu_dereference(cfg80211_regdomain); + + if (nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, regdom->alpha2) || + (regdom->dfs_region && + nla_put_u8(msg, NL80211_ATTR_DFS_REGION, regdom->dfs_region))) + goto nla_put_failure_rcu; + nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES); if (!nl_reg_rules) - goto nla_put_failure; + goto nla_put_failure_rcu; - for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) { + for (i = 0; i < regdom->n_reg_rules; i++) { struct nlattr *nl_reg_rule; const struct ieee80211_reg_rule *reg_rule; const struct ieee80211_freq_range *freq_range; const struct ieee80211_power_rule *power_rule; - reg_rule = &cfg80211_regdomain->reg_rules[i]; + reg_rule = ®dom->reg_rules[i]; freq_range = ®_rule->freq_range; power_rule = ®_rule->power_rule; nl_reg_rule = nla_nest_start(msg, i); if (!nl_reg_rule) - goto nla_put_failure; + goto nla_put_failure_rcu; if (nla_put_u32(msg, NL80211_ATTR_REG_RULE_FLAGS, reg_rule->flags) || @@ -4216,10 +4560,11 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) power_rule->max_antenna_gain) || nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_EIRP, power_rule->max_eirp)) - goto nla_put_failure; + goto nla_put_failure_rcu; nla_nest_end(msg, nl_reg_rule); } + rcu_read_unlock(); nla_nest_end(msg, nl_reg_rules); @@ -4227,6 +4572,8 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) err = genlmsg_reply(msg, info); goto out; +nla_put_failure_rcu: + rcu_read_unlock(); nla_put_failure: genlmsg_cancel(msg, hdr); put_failure: @@ -4259,27 +4606,18 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) dfs_region = nla_get_u8(info->attrs[NL80211_ATTR_DFS_REGION]); nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], - rem_reg_rules) { + rem_reg_rules) { num_rules++; if (num_rules > NL80211_MAX_SUPP_REG_RULES) return -EINVAL; } - mutex_lock(&cfg80211_mutex); - - if (!reg_is_valid_request(alpha2)) { - r = -EINVAL; - goto bad_reg; - } - size_of_regd = sizeof(struct ieee80211_regdomain) + - (num_rules * sizeof(struct ieee80211_reg_rule)); + num_rules * sizeof(struct ieee80211_reg_rule); rd = kzalloc(size_of_regd, GFP_KERNEL); - if (!rd) { - r = -ENOMEM; - goto bad_reg; - } + if (!rd) + return -ENOMEM; rd->n_reg_rules = num_rules; rd->alpha2[0] = alpha2[0]; @@ -4293,10 +4631,10 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) rd->dfs_region = dfs_region; nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], - rem_reg_rules) { + rem_reg_rules) { nla_parse(tb, NL80211_REG_RULE_ATTR_MAX, - nla_data(nl_reg_rule), nla_len(nl_reg_rule), - reg_rule_policy); + nla_data(nl_reg_rule), nla_len(nl_reg_rule), + reg_rule_policy); r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); if (r) goto bad_reg; @@ -4309,16 +4647,14 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) } } - BUG_ON(rule_idx != num_rules); + mutex_lock(&cfg80211_mutex); r = set_regdom(rd); - + /* set_regdom took ownership */ + rd = NULL; mutex_unlock(&cfg80211_mutex); - return r; - bad_reg: - mutex_unlock(&cfg80211_mutex); kfree(rd); return r; } @@ -4366,14 +4702,19 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->scan) return -EOPNOTSUPP; - if (rdev->scan_req) - return -EBUSY; + mutex_lock(&rdev->sched_scan_mtx); + if (rdev->scan_req) { + err = -EBUSY; + goto unlock; + } if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { n_channels = validate_scan_freqs( info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]); - if (!n_channels) - return -EINVAL; + if (!n_channels) { + err = -EINVAL; + goto unlock; + } } else { enum ieee80211_band band; n_channels = 0; @@ -4387,23 +4728,29 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) n_ssids++; - if (n_ssids > wiphy->max_scan_ssids) - return -EINVAL; + if (n_ssids > wiphy->max_scan_ssids) { + err = -EINVAL; + goto unlock; + } if (info->attrs[NL80211_ATTR_IE]) ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); else ie_len = 0; - if (ie_len > wiphy->max_scan_ie_len) - return -EINVAL; + if (ie_len > wiphy->max_scan_ie_len) { + err = -EINVAL; + goto unlock; + } request = kzalloc(sizeof(*request) + sizeof(*request->ssids) * n_ssids + sizeof(*request->channels) * n_channels + ie_len, GFP_KERNEL); - if (!request) - return -ENOMEM; + if (!request) { + err = -ENOMEM; + goto unlock; + } if (n_ssids) request->ssids = (void *)&request->channels[n_channels]; @@ -4540,6 +4887,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) kfree(request); } + unlock: + mutex_unlock(&rdev->sched_scan_mtx); return err; } @@ -4801,6 +5150,54 @@ static int nl80211_stop_sched_scan(struct sk_buff *skb, return err; } +static int nl80211_start_radar_detection(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_chan_def chandef; + int err; + + err = nl80211_parse_chandef(rdev, info, &chandef); + if (err) + return err; + + if (wdev->cac_started) + return -EBUSY; + + err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef); + if (err < 0) + return err; + + if (err == 0) + return -EINVAL; + + if (chandef.chan->dfs_state != NL80211_DFS_USABLE) + return -EINVAL; + + if (!rdev->ops->start_radar_detection) + return -EOPNOTSUPP; + + mutex_lock(&rdev->devlist_mtx); + err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, + chandef.chan, CHAN_MODE_SHARED, + BIT(chandef.width)); + if (err) + goto err_locked; + + err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef); + if (!err) { + wdev->channel = chandef.chan; + wdev->cac_started = true; + wdev->cac_start_time = jiffies; + } +err_locked: + mutex_unlock(&rdev->devlist_mtx); + + return err; +} + static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, u32 seq, int flags, struct cfg80211_registered_device *rdev, @@ -4811,6 +5208,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, const struct cfg80211_bss_ies *ies; void *hdr; struct nlattr *bss; + bool tsf = false; ASSERT_WDEV_LOCK(wdev); @@ -4834,22 +5232,24 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, rcu_read_lock(); ies = rcu_dereference(res->ies); - if (ies && ies->len && nla_put(msg, NL80211_BSS_INFORMATION_ELEMENTS, - ies->len, ies->data)) { - rcu_read_unlock(); - goto nla_put_failure; + if (ies) { + if (nla_put_u64(msg, NL80211_BSS_TSF, ies->tsf)) + goto fail_unlock_rcu; + tsf = true; + if (ies->len && nla_put(msg, NL80211_BSS_INFORMATION_ELEMENTS, + ies->len, ies->data)) + goto fail_unlock_rcu; } ies = rcu_dereference(res->beacon_ies); - if (ies && ies->len && nla_put(msg, NL80211_BSS_BEACON_IES, - ies->len, ies->data)) { - rcu_read_unlock(); - goto nla_put_failure; + if (ies) { + if (!tsf && nla_put_u64(msg, NL80211_BSS_TSF, ies->tsf)) + goto fail_unlock_rcu; + if (ies->len && nla_put(msg, NL80211_BSS_BEACON_IES, + ies->len, ies->data)) + goto fail_unlock_rcu; } rcu_read_unlock(); - if (res->tsf && - nla_put_u64(msg, NL80211_BSS_TSF, res->tsf)) - goto nla_put_failure; if (res->beacon_interval && nla_put_u16(msg, NL80211_BSS_BEACON_INTERVAL, res->beacon_interval)) goto nla_put_failure; @@ -4894,6 +5294,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, return genlmsg_end(msg, hdr); + fail_unlock_rcu: + rcu_read_unlock(); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; @@ -5867,6 +6269,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } + if (info->attrs[NL80211_ATTR_USE_MFP]) { + connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); + if (connect.mfp != NL80211_MFP_REQUIRED && + connect.mfp != NL80211_MFP_NO) + return -EINVAL; + } else { + connect.mfp = NL80211_MFP_NO; + } + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { connect.channel = ieee80211_get_channel(wiphy, @@ -6652,6 +7063,21 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]))) return -EINVAL; + if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) { + setup.beacon_interval = + nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); + if (setup.beacon_interval < 10 || + setup.beacon_interval > 10000) + return -EINVAL; + } + + if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) { + setup.dtim_period = + nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); + if (setup.dtim_period < 1 || setup.dtim_period > 100) + return -EINVAL; + } + if (info->attrs[NL80211_ATTR_MESH_SETUP]) { /* parse additional setup parameters if given */ err = nl80211_parse_mesh_setup(info, &setup); @@ -6680,16 +7106,100 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) } #ifdef CONFIG_PM +static int nl80211_send_wowlan_patterns(struct sk_buff *msg, + struct cfg80211_registered_device *rdev) +{ + struct nlattr *nl_pats, *nl_pat; + int i, pat_len; + + if (!rdev->wowlan->n_patterns) + return 0; + + nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); + if (!nl_pats) + return -ENOBUFS; + + for (i = 0; i < rdev->wowlan->n_patterns; i++) { + nl_pat = nla_nest_start(msg, i + 1); + if (!nl_pat) + return -ENOBUFS; + pat_len = rdev->wowlan->patterns[i].pattern_len; + if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, + DIV_ROUND_UP(pat_len, 8), + rdev->wowlan->patterns[i].mask) || + nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, + pat_len, rdev->wowlan->patterns[i].pattern) || + nla_put_u32(msg, NL80211_WOWLAN_PKTPAT_OFFSET, + rdev->wowlan->patterns[i].pkt_offset)) + return -ENOBUFS; + nla_nest_end(msg, nl_pat); + } + nla_nest_end(msg, nl_pats); + + return 0; +} + +static int nl80211_send_wowlan_tcp(struct sk_buff *msg, + struct cfg80211_wowlan_tcp *tcp) +{ + struct nlattr *nl_tcp; + + if (!tcp) + return 0; + + nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); + if (!nl_tcp) + return -ENOBUFS; + + if (nla_put_be32(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) || + nla_put_be32(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) || + nla_put(msg, NL80211_WOWLAN_TCP_DST_MAC, ETH_ALEN, tcp->dst_mac) || + nla_put_u16(msg, NL80211_WOWLAN_TCP_SRC_PORT, tcp->src_port) || + nla_put_u16(msg, NL80211_WOWLAN_TCP_DST_PORT, tcp->dst_port) || + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->payload_len, tcp->payload) || + nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, + tcp->data_interval) || + nla_put(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + tcp->wake_len, tcp->wake_data) || + nla_put(msg, NL80211_WOWLAN_TCP_WAKE_MASK, + DIV_ROUND_UP(tcp->wake_len, 8), tcp->wake_mask)) + return -ENOBUFS; + + if (tcp->payload_seq.len && + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ, + sizeof(tcp->payload_seq), &tcp->payload_seq)) + return -ENOBUFS; + + if (tcp->payload_tok.len && + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + sizeof(tcp->payload_tok) + tcp->tokens_size, + &tcp->payload_tok)) + return -ENOBUFS; + + return 0; +} + static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct sk_buff *msg; void *hdr; + u32 size = NLMSG_DEFAULT_SIZE; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns) + if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && + !rdev->wiphy.wowlan.tcp) return -EOPNOTSUPP; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (rdev->wowlan && rdev->wowlan->tcp) { + /* adjust size to have room for all the data */ + size += rdev->wowlan->tcp->tokens_size + + rdev->wowlan->tcp->payload_len + + rdev->wowlan->tcp->wake_len + + rdev->wowlan->tcp->wake_len / 8; + } + + msg = nlmsg_new(size, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -6720,31 +7230,12 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) (rdev->wowlan->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) goto nla_put_failure; - if (rdev->wowlan->n_patterns) { - struct nlattr *nl_pats, *nl_pat; - int i, pat_len; - nl_pats = nla_nest_start(msg, - NL80211_WOWLAN_TRIG_PKT_PATTERN); - if (!nl_pats) - goto nla_put_failure; + if (nl80211_send_wowlan_patterns(msg, rdev)) + goto nla_put_failure; - for (i = 0; i < rdev->wowlan->n_patterns; i++) { - nl_pat = nla_nest_start(msg, i + 1); - if (!nl_pat) - goto nla_put_failure; - pat_len = rdev->wowlan->patterns[i].pattern_len; - if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, - DIV_ROUND_UP(pat_len, 8), - rdev->wowlan->patterns[i].mask) || - nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, - pat_len, - rdev->wowlan->patterns[i].pattern)) - goto nla_put_failure; - nla_nest_end(msg, nl_pat); - } - nla_nest_end(msg, nl_pats); - } + if (nl80211_send_wowlan_tcp(msg, rdev->wowlan->tcp)) + goto nla_put_failure; nla_nest_end(msg, nl_wowlan); } @@ -6757,6 +7248,150 @@ nla_put_failure: return -ENOBUFS; } +static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, + struct nlattr *attr, + struct cfg80211_wowlan *trig) +{ + struct nlattr *tb[NUM_NL80211_WOWLAN_TCP]; + struct cfg80211_wowlan_tcp *cfg; + struct nl80211_wowlan_tcp_data_token *tok = NULL; + struct nl80211_wowlan_tcp_data_seq *seq = NULL; + u32 size; + u32 data_size, wake_size, tokens_size = 0, wake_mask_size; + int err, port; + + if (!rdev->wiphy.wowlan.tcp) + return -EINVAL; + + err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP, + nla_data(attr), nla_len(attr), + nl80211_wowlan_tcp_policy); + if (err) + return err; + + if (!tb[NL80211_WOWLAN_TCP_SRC_IPV4] || + !tb[NL80211_WOWLAN_TCP_DST_IPV4] || + !tb[NL80211_WOWLAN_TCP_DST_MAC] || + !tb[NL80211_WOWLAN_TCP_DST_PORT] || + !tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD] || + !tb[NL80211_WOWLAN_TCP_DATA_INTERVAL] || + !tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD] || + !tb[NL80211_WOWLAN_TCP_WAKE_MASK]) + return -EINVAL; + + data_size = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]); + if (data_size > rdev->wiphy.wowlan.tcp->data_payload_max) + return -EINVAL; + + if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > + rdev->wiphy.wowlan.tcp->data_interval_max) + return -EINVAL; + + wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); + if (wake_size > rdev->wiphy.wowlan.tcp->wake_payload_max) + return -EINVAL; + + wake_mask_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_MASK]); + if (wake_mask_size != DIV_ROUND_UP(wake_size, 8)) + return -EINVAL; + + if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]) { + u32 tokln = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]); + + tok = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]); + tokens_size = tokln - sizeof(*tok); + + if (!tok->len || tokens_size % tok->len) + return -EINVAL; + if (!rdev->wiphy.wowlan.tcp->tok) + return -EINVAL; + if (tok->len > rdev->wiphy.wowlan.tcp->tok->max_len) + return -EINVAL; + if (tok->len < rdev->wiphy.wowlan.tcp->tok->min_len) + return -EINVAL; + if (tokens_size > rdev->wiphy.wowlan.tcp->tok->bufsize) + return -EINVAL; + if (tok->offset + tok->len > data_size) + return -EINVAL; + } + + if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]) { + seq = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]); + if (!rdev->wiphy.wowlan.tcp->seq) + return -EINVAL; + if (seq->len == 0 || seq->len > 4) + return -EINVAL; + if (seq->len + seq->offset > data_size) + return -EINVAL; + } + + size = sizeof(*cfg); + size += data_size; + size += wake_size + wake_mask_size; + size += tokens_size; + + cfg = kzalloc(size, GFP_KERNEL); + if (!cfg) + return -ENOMEM; + cfg->src = nla_get_be32(tb[NL80211_WOWLAN_TCP_SRC_IPV4]); + cfg->dst = nla_get_be32(tb[NL80211_WOWLAN_TCP_DST_IPV4]); + memcpy(cfg->dst_mac, nla_data(tb[NL80211_WOWLAN_TCP_DST_MAC]), + ETH_ALEN); + if (tb[NL80211_WOWLAN_TCP_SRC_PORT]) + port = nla_get_u16(tb[NL80211_WOWLAN_TCP_SRC_PORT]); + else + port = 0; +#ifdef CONFIG_INET + /* allocate a socket and port for it and use it */ + err = __sock_create(wiphy_net(&rdev->wiphy), PF_INET, SOCK_STREAM, + IPPROTO_TCP, &cfg->sock, 1); + if (err) { + kfree(cfg); + return err; + } + if (inet_csk_get_port(cfg->sock->sk, port)) { + sock_release(cfg->sock); + kfree(cfg); + return -EADDRINUSE; + } + cfg->src_port = inet_sk(cfg->sock->sk)->inet_num; +#else + if (!port) { + kfree(cfg); + return -EINVAL; + } + cfg->src_port = port; +#endif + + cfg->dst_port = nla_get_u16(tb[NL80211_WOWLAN_TCP_DST_PORT]); + cfg->payload_len = data_size; + cfg->payload = (u8 *)cfg + sizeof(*cfg) + tokens_size; + memcpy((void *)cfg->payload, + nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]), + data_size); + if (seq) + cfg->payload_seq = *seq; + cfg->data_interval = nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]); + cfg->wake_len = wake_size; + cfg->wake_data = (u8 *)cfg + sizeof(*cfg) + tokens_size + data_size; + memcpy((void *)cfg->wake_data, + nla_data(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]), + wake_size); + cfg->wake_mask = (u8 *)cfg + sizeof(*cfg) + tokens_size + + data_size + wake_size; + memcpy((void *)cfg->wake_mask, + nla_data(tb[NL80211_WOWLAN_TCP_WAKE_MASK]), + wake_mask_size); + if (tok) { + cfg->tokens_size = tokens_size; + memcpy(&cfg->payload_tok, tok, sizeof(*tok) + tokens_size); + } + + trig->tcp = cfg; + + return 0; +} + static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6767,7 +7402,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) int err, i; bool prev_enabled = rdev->wowlan; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns) + if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && + !rdev->wiphy.wowlan.tcp) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) { @@ -6831,7 +7467,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) { struct nlattr *pat; int n_patterns = 0; - int rem, pat_len, mask_len; + int rem, pat_len, mask_len, pkt_offset; struct nlattr *pat_tb[NUM_NL80211_WOWLAN_PKTPAT]; nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], @@ -6866,6 +7502,15 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) pat_len < wowlan->pattern_min_len) goto error; + if (!pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]) + pkt_offset = 0; + else + pkt_offset = nla_get_u32( + pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]); + if (pkt_offset > wowlan->max_pkt_offset) + goto error; + new_triggers.patterns[i].pkt_offset = pkt_offset; + new_triggers.patterns[i].mask = kmalloc(mask_len + pat_len, GFP_KERNEL); if (!new_triggers.patterns[i].mask) { @@ -6885,6 +7530,14 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) } } + if (tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION]) { + err = nl80211_parse_wowlan_tcp( + rdev, tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION], + &new_triggers); + if (err) + goto error; + } + ntrig = kmemdup(&new_triggers, sizeof(new_triggers), GFP_KERNEL); if (!ntrig) { err = -ENOMEM; @@ -6902,6 +7555,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) for (i = 0; i < new_triggers.n_patterns; i++) kfree(new_triggers.patterns[i].mask); kfree(new_triggers.patterns); + if (new_triggers.tcp && new_triggers.tcp->sock) + sock_release(new_triggers.tcp->sock); + kfree(new_triggers.tcp); return err; } #endif @@ -7106,20 +7762,9 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->stop_p2p_device) return -EOPNOTSUPP; - if (!wdev->p2p_started) - return 0; - - rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; - - mutex_lock(&rdev->devlist_mtx); - rdev->opencount--; - mutex_unlock(&rdev->devlist_mtx); - - if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, true); - } + mutex_lock(&rdev->sched_scan_mtx); + cfg80211_stop_p2p_device(rdev, wdev); + mutex_unlock(&rdev->sched_scan_mtx); return 0; } @@ -7784,6 +8429,22 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_MAC_ACL, + .doit = nl80211_set_mac_acl, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_RADAR_DETECT, + .doit = nl80211_start_radar_detection, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -7827,7 +8488,7 @@ static int nl80211_add_scan_req(struct sk_buff *msg, struct nlattr *nest; int i; - ASSERT_RDEV_LOCK(rdev); + lockdep_assert_held(&rdev->sched_scan_mtx); if (WARN_ON(!req)) return 0; @@ -8051,7 +8712,7 @@ void nl80211_send_reg_change_event(struct regulatory_request *request) goto nla_put_failure; } - if (wiphy_idx_valid(request->wiphy_idx) && + if (request->wiphy_idx != WIPHY_IDX_INVALID && nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx)) goto nla_put_failure; @@ -8981,6 +9642,57 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, } void +nl80211_radar_notify(struct cfg80211_registered_device *rdev, + struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event, + struct net_device *netdev, gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_RADAR_DETECT); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) + goto nla_put_failure; + + /* NOP and radar events don't need a netdev parameter */ + if (netdev) { + struct wireless_dev *wdev = netdev->ieee80211_ptr; + + if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + goto nla_put_failure; + } + + if (nla_put_u32(msg, NL80211_ATTR_RADAR_EVENT, event)) + goto nla_put_failure; + + if (nl80211_send_chandef(msg, chandef)) + goto nla_put_failure; + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + +void nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer, u32 num_packets, gfp_t gfp) @@ -9115,6 +9827,114 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_report_obss_beacon); +#ifdef CONFIG_PM +void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct sk_buff *msg; + void *hdr; + int err, size = 200; + + trace_cfg80211_report_wowlan_wakeup(wdev->wiphy, wdev, wakeup); + + if (wakeup) + size += wakeup->packet_present_len; + + msg = nlmsg_new(size, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_SET_WOWLAN); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + goto free_msg; + + if (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) + goto free_msg; + + if (wakeup) { + struct nlattr *reasons; + + reasons = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); + + if (wakeup->disconnect && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) + goto free_msg; + if (wakeup->magic_pkt && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) + goto free_msg; + if (wakeup->gtk_rekey_failure && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) + goto free_msg; + if (wakeup->eap_identity_req && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) + goto free_msg; + if (wakeup->four_way_handshake && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) + goto free_msg; + if (wakeup->rfkill_release && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)) + goto free_msg; + + if (wakeup->pattern_idx >= 0 && + nla_put_u32(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, + wakeup->pattern_idx)) + goto free_msg; + + if (wakeup->tcp_match) + nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH); + + if (wakeup->tcp_connlost) + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST); + + if (wakeup->tcp_nomoretokens) + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS); + + if (wakeup->packet) { + u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; + u32 len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN; + + if (!wakeup->packet_80211) { + pkt_attr = + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023; + len_attr = + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN; + } + + if (wakeup->packet_len && + nla_put_u32(msg, len_attr, wakeup->packet_len)) + goto free_msg; + + if (nla_put(msg, pkt_attr, wakeup->packet_present_len, + wakeup->packet)) + goto free_msg; + } + + nla_nest_end(msg, reasons); + } + + err = genlmsg_end(msg, hdr); + if (err < 0) + goto free_msg; + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + free_msg: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_report_wowlan_wakeup); +#endif + void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper, u16 reason_code, gfp_t gfp) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 2acba84..b061da4 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -108,6 +108,13 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); + +void +nl80211_radar_notify(struct cfg80211_registered_device *rdev, + struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event, + struct net_device *netdev, gfp_t gfp); + void nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 6c0c819..422d382 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -875,4 +875,16 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); trace_rdev_return_void(&rdev->wiphy); } + +static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_acl_data *params) +{ + int ret; + + trace_rdev_set_mac_acl(&rdev->wiphy, dev, params); + ret = rdev->ops->set_mac_acl(&rdev->wiphy, dev, params); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 6e53089..98532c00 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -48,7 +48,6 @@ #include <linux/export.h> #include <linux/slab.h> #include <linux/list.h> -#include <linux/random.h> #include <linux/ctype.h> #include <linux/nl80211.h> #include <linux/platform_device.h> @@ -66,6 +65,13 @@ #define REG_DBG_PRINT(args...) #endif +enum reg_request_treatment { + REG_REQ_OK, + REG_REQ_IGNORE, + REG_REQ_INTERSECT, + REG_REQ_ALREADY_SET, +}; + static struct regulatory_request core_request_world = { .initiator = NL80211_REGDOM_SET_BY_CORE, .alpha2[0] = '0', @@ -76,7 +82,8 @@ static struct regulatory_request core_request_world = { }; /* Receipt of information from last regulatory request */ -static struct regulatory_request *last_request = &core_request_world; +static struct regulatory_request __rcu *last_request = + (void __rcu *)&core_request_world; /* To trigger userspace events */ static struct platform_device *reg_pdev; @@ -88,16 +95,16 @@ static struct device_type reg_device_type = { /* * Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no - * information to give us an alpha2 + * information to give us an alpha2. */ -const struct ieee80211_regdomain *cfg80211_regdomain; +const struct ieee80211_regdomain __rcu *cfg80211_regdomain; /* * Protects static reg.c components: - * - cfg80211_world_regdom - * - cfg80211_regdom - * - last_request - * - reg_num_devs_support_basehint + * - cfg80211_regdomain (if not used with RCU) + * - cfg80211_world_regdom + * - last_request (if not used with RCU) + * - reg_num_devs_support_basehint */ static DEFINE_MUTEX(reg_mutex); @@ -112,6 +119,31 @@ static inline void assert_reg_lock(void) lockdep_assert_held(®_mutex); } +static const struct ieee80211_regdomain *get_cfg80211_regdom(void) +{ + return rcu_dereference_protected(cfg80211_regdomain, + lockdep_is_held(®_mutex)); +} + +static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) +{ + return rcu_dereference_protected(wiphy->regd, + lockdep_is_held(®_mutex)); +} + +static void rcu_free_regdom(const struct ieee80211_regdomain *r) +{ + if (!r) + return; + kfree_rcu((struct ieee80211_regdomain *)r, rcu_head); +} + +static struct regulatory_request *get_last_request(void) +{ + return rcu_dereference_check(last_request, + lockdep_is_held(®_mutex)); +} + /* Used to queue up regulatory hints */ static LIST_HEAD(reg_requests_list); static spinlock_t reg_requests_lock; @@ -177,28 +209,37 @@ static char user_alpha2[2]; module_param(ieee80211_regdom, charp, 0444); MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); -static void reset_regdomains(bool full_reset) +static void reset_regdomains(bool full_reset, + const struct ieee80211_regdomain *new_regdom) { + const struct ieee80211_regdomain *r; + struct regulatory_request *lr; + + assert_reg_lock(); + + r = get_cfg80211_regdom(); + /* avoid freeing static information or freeing something twice */ - if (cfg80211_regdomain == cfg80211_world_regdom) - cfg80211_regdomain = NULL; + if (r == cfg80211_world_regdom) + r = NULL; if (cfg80211_world_regdom == &world_regdom) cfg80211_world_regdom = NULL; - if (cfg80211_regdomain == &world_regdom) - cfg80211_regdomain = NULL; + if (r == &world_regdom) + r = NULL; - kfree(cfg80211_regdomain); - kfree(cfg80211_world_regdom); + rcu_free_regdom(r); + rcu_free_regdom(cfg80211_world_regdom); cfg80211_world_regdom = &world_regdom; - cfg80211_regdomain = NULL; + rcu_assign_pointer(cfg80211_regdomain, new_regdom); if (!full_reset) return; - if (last_request != &core_request_world) - kfree(last_request); - last_request = &core_request_world; + lr = get_last_request(); + if (lr != &core_request_world && lr) + kfree_rcu(lr, rcu_head); + rcu_assign_pointer(last_request, &core_request_world); } /* @@ -207,30 +248,29 @@ static void reset_regdomains(bool full_reset) */ static void update_world_regdomain(const struct ieee80211_regdomain *rd) { - BUG_ON(!last_request); + struct regulatory_request *lr; - reset_regdomains(false); + lr = get_last_request(); + + WARN_ON(!lr); + + reset_regdomains(false, rd); cfg80211_world_regdom = rd; - cfg80211_regdomain = rd; } bool is_world_regdom(const char *alpha2) { if (!alpha2) return false; - if (alpha2[0] == '0' && alpha2[1] == '0') - return true; - return false; + return alpha2[0] == '0' && alpha2[1] == '0'; } static bool is_alpha2_set(const char *alpha2) { if (!alpha2) return false; - if (alpha2[0] != 0 && alpha2[1] != 0) - return true; - return false; + return alpha2[0] && alpha2[1]; } static bool is_unknown_alpha2(const char *alpha2) @@ -241,9 +281,7 @@ static bool is_unknown_alpha2(const char *alpha2) * Special case where regulatory domain was built by driver * but a specific alpha2 cannot be determined */ - if (alpha2[0] == '9' && alpha2[1] == '9') - return true; - return false; + return alpha2[0] == '9' && alpha2[1] == '9'; } static bool is_intersected_alpha2(const char *alpha2) @@ -255,39 +293,30 @@ static bool is_intersected_alpha2(const char *alpha2) * result of an intersection between two regulatory domain * structures */ - if (alpha2[0] == '9' && alpha2[1] == '8') - return true; - return false; + return alpha2[0] == '9' && alpha2[1] == '8'; } static bool is_an_alpha2(const char *alpha2) { if (!alpha2) return false; - if (isalpha(alpha2[0]) && isalpha(alpha2[1])) - return true; - return false; + return isalpha(alpha2[0]) && isalpha(alpha2[1]); } static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y) { if (!alpha2_x || !alpha2_y) return false; - if (alpha2_x[0] == alpha2_y[0] && - alpha2_x[1] == alpha2_y[1]) - return true; - return false; + return alpha2_x[0] == alpha2_y[0] && alpha2_x[1] == alpha2_y[1]; } static bool regdom_changes(const char *alpha2) { - assert_cfg80211_lock(); + const struct ieee80211_regdomain *r = get_cfg80211_regdom(); - if (!cfg80211_regdomain) + if (!r) return true; - if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) - return false; - return true; + return !alpha2_equal(r->alpha2, alpha2); } /* @@ -301,38 +330,36 @@ static bool is_user_regdom_saved(void) return false; /* This would indicate a mistake on the design */ - if (WARN((!is_world_regdom(user_alpha2) && - !is_an_alpha2(user_alpha2)), + if (WARN(!is_world_regdom(user_alpha2) && !is_an_alpha2(user_alpha2), "Unexpected user alpha2: %c%c\n", - user_alpha2[0], - user_alpha2[1])) + user_alpha2[0], user_alpha2[1])) return false; return true; } -static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd, - const struct ieee80211_regdomain *src_regd) +static const struct ieee80211_regdomain * +reg_copy_regd(const struct ieee80211_regdomain *src_regd) { struct ieee80211_regdomain *regd; - int size_of_regd = 0; + int size_of_regd; unsigned int i; - size_of_regd = sizeof(struct ieee80211_regdomain) + - ((src_regd->n_reg_rules + 1) * sizeof(struct ieee80211_reg_rule)); + size_of_regd = + sizeof(struct ieee80211_regdomain) + + src_regd->n_reg_rules * sizeof(struct ieee80211_reg_rule); regd = kzalloc(size_of_regd, GFP_KERNEL); if (!regd) - return -ENOMEM; + return ERR_PTR(-ENOMEM); memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain)); for (i = 0; i < src_regd->n_reg_rules; i++) memcpy(®d->reg_rules[i], &src_regd->reg_rules[i], - sizeof(struct ieee80211_reg_rule)); + sizeof(struct ieee80211_reg_rule)); - *dst_regd = regd; - return 0; + return regd; } #ifdef CONFIG_CFG80211_INTERNAL_REGDB @@ -347,9 +374,8 @@ static DEFINE_MUTEX(reg_regdb_search_mutex); static void reg_regdb_search(struct work_struct *work) { struct reg_regdb_search_request *request; - const struct ieee80211_regdomain *curdom, *regdom; - int i, r; - bool set_reg = false; + const struct ieee80211_regdomain *curdom, *regdom = NULL; + int i; mutex_lock(&cfg80211_mutex); @@ -360,14 +386,11 @@ static void reg_regdb_search(struct work_struct *work) list); list_del(&request->list); - for (i=0; i<reg_regdb_size; i++) { + for (i = 0; i < reg_regdb_size; i++) { curdom = reg_regdb[i]; - if (!memcmp(request->alpha2, curdom->alpha2, 2)) { - r = reg_copy_regd(®dom, curdom); - if (r) - break; - set_reg = true; + if (alpha2_equal(request->alpha2, curdom->alpha2)) { + regdom = reg_copy_regd(curdom); break; } } @@ -376,7 +399,7 @@ static void reg_regdb_search(struct work_struct *work) } mutex_unlock(®_regdb_search_mutex); - if (set_reg) + if (!IS_ERR_OR_NULL(regdom)) set_regdom(regdom); mutex_unlock(&cfg80211_mutex); @@ -434,15 +457,14 @@ static int call_crda(const char *alpha2) return kobject_uevent(®_pdev->dev.kobj, KOBJ_CHANGE); } -/* Used by nl80211 before kmalloc'ing our regulatory domain */ -bool reg_is_valid_request(const char *alpha2) +static bool reg_is_valid_request(const char *alpha2) { - assert_cfg80211_lock(); + struct regulatory_request *lr = get_last_request(); - if (!last_request) + if (!lr || lr->processed) return false; - return alpha2_equal(last_request->alpha2, alpha2); + return alpha2_equal(lr->alpha2, alpha2); } /* Sanity check on a regulatory rule */ @@ -460,7 +482,7 @@ static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule) freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; if (freq_range->end_freq_khz <= freq_range->start_freq_khz || - freq_range->max_bandwidth_khz > freq_diff) + freq_range->max_bandwidth_khz > freq_diff) return false; return true; @@ -487,8 +509,7 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd) } static bool reg_does_bw_fit(const struct ieee80211_freq_range *freq_range, - u32 center_freq_khz, - u32 bw_khz) + u32 center_freq_khz, u32 bw_khz) { u32 start_freq_khz, end_freq_khz; @@ -518,7 +539,7 @@ static bool reg_does_bw_fit(const struct ieee80211_freq_range *freq_range, * regulatory rule support for other "bands". **/ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, - u32 freq_khz) + u32 freq_khz) { #define ONE_GHZ_IN_KHZ 1000000 /* @@ -540,10 +561,9 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, * Helper for regdom_intersect(), this does the real * mathematical intersection fun */ -static int reg_rules_intersect( - const struct ieee80211_reg_rule *rule1, - const struct ieee80211_reg_rule *rule2, - struct ieee80211_reg_rule *intersected_rule) +static int reg_rules_intersect(const struct ieee80211_reg_rule *rule1, + const struct ieee80211_reg_rule *rule2, + struct ieee80211_reg_rule *intersected_rule) { const struct ieee80211_freq_range *freq_range1, *freq_range2; struct ieee80211_freq_range *freq_range; @@ -560,11 +580,11 @@ static int reg_rules_intersect( power_rule = &intersected_rule->power_rule; freq_range->start_freq_khz = max(freq_range1->start_freq_khz, - freq_range2->start_freq_khz); + freq_range2->start_freq_khz); freq_range->end_freq_khz = min(freq_range1->end_freq_khz, - freq_range2->end_freq_khz); + freq_range2->end_freq_khz); freq_range->max_bandwidth_khz = min(freq_range1->max_bandwidth_khz, - freq_range2->max_bandwidth_khz); + freq_range2->max_bandwidth_khz); freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; if (freq_range->max_bandwidth_khz > freq_diff) @@ -575,7 +595,7 @@ static int reg_rules_intersect( power_rule->max_antenna_gain = min(power_rule1->max_antenna_gain, power_rule2->max_antenna_gain); - intersected_rule->flags = (rule1->flags | rule2->flags); + intersected_rule->flags = rule1->flags | rule2->flags; if (!is_valid_reg_rule(intersected_rule)) return -EINVAL; @@ -596,9 +616,9 @@ static int reg_rules_intersect( * resulting intersection of rules between rd1 and rd2. We will * kzalloc() this structure for you. */ -static struct ieee80211_regdomain *regdom_intersect( - const struct ieee80211_regdomain *rd1, - const struct ieee80211_regdomain *rd2) +static struct ieee80211_regdomain * +regdom_intersect(const struct ieee80211_regdomain *rd1, + const struct ieee80211_regdomain *rd2) { int r, size_of_regd; unsigned int x, y; @@ -607,12 +627,7 @@ static struct ieee80211_regdomain *regdom_intersect( struct ieee80211_reg_rule *intersected_rule; struct ieee80211_regdomain *rd; /* This is just a dummy holder to help us count */ - struct ieee80211_reg_rule irule; - - /* Uses the stack temporarily for counter arithmetic */ - intersected_rule = &irule; - - memset(intersected_rule, 0, sizeof(struct ieee80211_reg_rule)); + struct ieee80211_reg_rule dummy_rule; if (!rd1 || !rd2) return NULL; @@ -629,11 +644,8 @@ static struct ieee80211_regdomain *regdom_intersect( rule1 = &rd1->reg_rules[x]; for (y = 0; y < rd2->n_reg_rules; y++) { rule2 = &rd2->reg_rules[y]; - if (!reg_rules_intersect(rule1, rule2, - intersected_rule)) + if (!reg_rules_intersect(rule1, rule2, &dummy_rule)) num_rules++; - memset(intersected_rule, 0, - sizeof(struct ieee80211_reg_rule)); } } @@ -641,15 +653,15 @@ static struct ieee80211_regdomain *regdom_intersect( return NULL; size_of_regd = sizeof(struct ieee80211_regdomain) + - ((num_rules + 1) * sizeof(struct ieee80211_reg_rule)); + num_rules * sizeof(struct ieee80211_reg_rule); rd = kzalloc(size_of_regd, GFP_KERNEL); if (!rd) return NULL; - for (x = 0; x < rd1->n_reg_rules; x++) { + for (x = 0; x < rd1->n_reg_rules && rule_idx < num_rules; x++) { rule1 = &rd1->reg_rules[x]; - for (y = 0; y < rd2->n_reg_rules; y++) { + for (y = 0; y < rd2->n_reg_rules && rule_idx < num_rules; y++) { rule2 = &rd2->reg_rules[y]; /* * This time around instead of using the stack lets @@ -657,8 +669,7 @@ static struct ieee80211_regdomain *regdom_intersect( * a memcpy() */ intersected_rule = &rd->reg_rules[rule_idx]; - r = reg_rules_intersect(rule1, rule2, - intersected_rule); + r = reg_rules_intersect(rule1, rule2, intersected_rule); /* * No need to memset here the intersected rule here as * we're not using the stack anymore @@ -699,34 +710,16 @@ static u32 map_regdom_flags(u32 rd_flags) return channel_flags; } -static int freq_reg_info_regd(struct wiphy *wiphy, - u32 center_freq, - u32 desired_bw_khz, - const struct ieee80211_reg_rule **reg_rule, - const struct ieee80211_regdomain *custom_regd) +static const struct ieee80211_reg_rule * +freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq, + const struct ieee80211_regdomain *regd) { int i; bool band_rule_found = false; - const struct ieee80211_regdomain *regd; bool bw_fits = false; - if (!desired_bw_khz) - desired_bw_khz = MHZ_TO_KHZ(20); - - regd = custom_regd ? custom_regd : cfg80211_regdomain; - - /* - * Follow the driver's regulatory domain, if present, unless a country - * IE has been processed or a user wants to help complaince further - */ - if (!custom_regd && - last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && - last_request->initiator != NL80211_REGDOM_SET_BY_USER && - wiphy->regd) - regd = wiphy->regd; - if (!regd) - return -EINVAL; + return ERR_PTR(-EINVAL); for (i = 0; i < regd->n_reg_rules; i++) { const struct ieee80211_reg_rule *rr; @@ -743,33 +736,36 @@ static int freq_reg_info_regd(struct wiphy *wiphy, if (!band_rule_found) band_rule_found = freq_in_rule_band(fr, center_freq); - bw_fits = reg_does_bw_fit(fr, - center_freq, - desired_bw_khz); + bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(20)); - if (band_rule_found && bw_fits) { - *reg_rule = rr; - return 0; - } + if (band_rule_found && bw_fits) + return rr; } if (!band_rule_found) - return -ERANGE; + return ERR_PTR(-ERANGE); - return -EINVAL; + return ERR_PTR(-EINVAL); } -int freq_reg_info(struct wiphy *wiphy, - u32 center_freq, - u32 desired_bw_khz, - const struct ieee80211_reg_rule **reg_rule) +const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy, + u32 center_freq) { - assert_cfg80211_lock(); - return freq_reg_info_regd(wiphy, - center_freq, - desired_bw_khz, - reg_rule, - NULL); + const struct ieee80211_regdomain *regd; + struct regulatory_request *lr = get_last_request(); + + /* + * Follow the driver's regulatory domain, if present, unless a country + * IE has been processed or a user wants to help complaince further + */ + if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && + lr->initiator != NL80211_REGDOM_SET_BY_USER && + wiphy->regd) + regd = get_wiphy_regdom(wiphy); + else + regd = get_cfg80211_regdom(); + + return freq_reg_info_regd(wiphy, center_freq, regd); } EXPORT_SYMBOL(freq_reg_info); @@ -792,7 +788,6 @@ static const char *reg_initiator_name(enum nl80211_reg_initiator initiator) } static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, - u32 desired_bw_khz, const struct ieee80211_reg_rule *reg_rule) { const struct ieee80211_power_rule *power_rule; @@ -807,21 +802,16 @@ static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, else snprintf(max_antenna_gain, 32, "%d", power_rule->max_antenna_gain); - REG_DBG_PRINT("Updating information on frequency %d MHz " - "for a %d MHz width channel with regulatory rule:\n", - chan->center_freq, - KHZ_TO_MHZ(desired_bw_khz)); + REG_DBG_PRINT("Updating information on frequency %d MHz with regulatory rule:\n", + chan->center_freq); REG_DBG_PRINT("%d KHz - %d KHz @ %d KHz), (%s mBi, %d mBm)\n", - freq_range->start_freq_khz, - freq_range->end_freq_khz, - freq_range->max_bandwidth_khz, - max_antenna_gain, + freq_range->start_freq_khz, freq_range->end_freq_khz, + freq_range->max_bandwidth_khz, max_antenna_gain, power_rule->max_eirp); } #else static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, - u32 desired_bw_khz, const struct ieee80211_reg_rule *reg_rule) { return; @@ -831,43 +821,25 @@ static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, /* * Note that right now we assume the desired channel bandwidth * is always 20 MHz for each individual channel (HT40 uses 20 MHz - * per channel, the primary and the extension channel). To support - * smaller custom bandwidths such as 5 MHz or 10 MHz we'll need a - * new ieee80211_channel.target_bw and re run the regulatory check - * on the wiphy with the target_bw specified. Then we can simply use - * that below for the desired_bw_khz below. + * per channel, the primary and the extension channel). */ static void handle_channel(struct wiphy *wiphy, enum nl80211_reg_initiator initiator, - enum ieee80211_band band, - unsigned int chan_idx) + struct ieee80211_channel *chan) { - int r; u32 flags, bw_flags = 0; - u32 desired_bw_khz = MHZ_TO_KHZ(20); const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; const struct ieee80211_freq_range *freq_range = NULL; - struct ieee80211_supported_band *sband; - struct ieee80211_channel *chan; struct wiphy *request_wiphy = NULL; + struct regulatory_request *lr = get_last_request(); - assert_cfg80211_lock(); - - request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); - - sband = wiphy->bands[band]; - BUG_ON(chan_idx >= sband->n_channels); - chan = &sband->channels[chan_idx]; + request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); flags = chan->orig_flags; - r = freq_reg_info(wiphy, - MHZ_TO_KHZ(chan->center_freq), - desired_bw_khz, - ®_rule); - - if (r) { + reg_rule = freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq)); + if (IS_ERR(reg_rule)) { /* * We will disable all channels that do not match our * received regulatory rule unless the hint is coming @@ -879,7 +851,7 @@ static void handle_channel(struct wiphy *wiphy, * while 5 GHz is still supported. */ if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && - r == -ERANGE) + PTR_ERR(reg_rule) == -ERANGE) return; REG_DBG_PRINT("Disabling freq %d MHz\n", chan->center_freq); @@ -887,15 +859,19 @@ static void handle_channel(struct wiphy *wiphy, return; } - chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule); + chan_reg_rule_print_dbg(chan, reg_rule); power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40)) bw_flags = IEEE80211_CHAN_NO_HT40; + if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(80)) + bw_flags |= IEEE80211_CHAN_NO_80MHZ; + if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(160)) + bw_flags |= IEEE80211_CHAN_NO_160MHZ; - if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && + if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && request_wiphy && request_wiphy == wiphy && request_wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { /* @@ -912,10 +888,14 @@ static void handle_channel(struct wiphy *wiphy, return; } + chan->dfs_state = NL80211_DFS_USABLE; + chan->dfs_state_entered = jiffies; + chan->beacon_found = false; chan->flags = flags | bw_flags | map_regdom_flags(reg_rule->flags); - chan->max_antenna_gain = min(chan->orig_mag, - (int) MBI_TO_DBI(power_rule->max_antenna_gain)); + chan->max_antenna_gain = + min_t(int, chan->orig_mag, + MBI_TO_DBI(power_rule->max_antenna_gain)); chan->max_reg_power = (int) MBM_TO_DBM(power_rule->max_eirp); if (chan->orig_mpwr) { /* @@ -935,68 +915,65 @@ static void handle_channel(struct wiphy *wiphy, } static void handle_band(struct wiphy *wiphy, - enum ieee80211_band band, - enum nl80211_reg_initiator initiator) + enum nl80211_reg_initiator initiator, + struct ieee80211_supported_band *sband) { unsigned int i; - struct ieee80211_supported_band *sband; - BUG_ON(!wiphy->bands[band]); - sband = wiphy->bands[band]; + if (!sband) + return; for (i = 0; i < sband->n_channels; i++) - handle_channel(wiphy, initiator, band, i); + handle_channel(wiphy, initiator, &sband->channels[i]); } static bool reg_request_cell_base(struct regulatory_request *request) { if (request->initiator != NL80211_REGDOM_SET_BY_USER) return false; - if (request->user_reg_hint_type != NL80211_USER_REG_HINT_CELL_BASE) - return false; - return true; + return request->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE; } bool reg_last_request_cell_base(void) { bool val; - assert_cfg80211_lock(); mutex_lock(®_mutex); - val = reg_request_cell_base(last_request); + val = reg_request_cell_base(get_last_request()); mutex_unlock(®_mutex); + return val; } #ifdef CONFIG_CFG80211_CERTIFICATION_ONUS - /* Core specific check */ -static int reg_ignore_cell_hint(struct regulatory_request *pending_request) +static enum reg_request_treatment +reg_ignore_cell_hint(struct regulatory_request *pending_request) { + struct regulatory_request *lr = get_last_request(); + if (!reg_num_devs_support_basehint) - return -EOPNOTSUPP; + return REG_REQ_IGNORE; - if (reg_request_cell_base(last_request)) { - if (!regdom_changes(pending_request->alpha2)) - return -EALREADY; - return 0; - } - return 0; + if (reg_request_cell_base(lr) && + !regdom_changes(pending_request->alpha2)) + return REG_REQ_ALREADY_SET; + + return REG_REQ_OK; } /* Device specific check */ static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy) { - if (!(wiphy->features & NL80211_FEATURE_CELL_BASE_REG_HINTS)) - return true; - return false; + return !(wiphy->features & NL80211_FEATURE_CELL_BASE_REG_HINTS); } #else static int reg_ignore_cell_hint(struct regulatory_request *pending_request) { - return -EOPNOTSUPP; + return REG_REQ_IGNORE; } -static int reg_dev_ignore_cell_hint(struct wiphy *wiphy) + +static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy) { return true; } @@ -1006,18 +983,17 @@ static int reg_dev_ignore_cell_hint(struct wiphy *wiphy) static bool ignore_reg_update(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { - if (!last_request) { - REG_DBG_PRINT("Ignoring regulatory request %s since " - "last_request is not set\n", + struct regulatory_request *lr = get_last_request(); + + if (!lr) { + REG_DBG_PRINT("Ignoring regulatory request %s since last_request is not set\n", reg_initiator_name(initiator)); return true; } if (initiator == NL80211_REGDOM_SET_BY_CORE && wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) { - REG_DBG_PRINT("Ignoring regulatory request %s " - "since the driver uses its own custom " - "regulatory domain\n", + REG_DBG_PRINT("Ignoring regulatory request %s since the driver uses its own custom regulatory domain\n", reg_initiator_name(initiator)); return true; } @@ -1028,22 +1004,35 @@ static bool ignore_reg_update(struct wiphy *wiphy, */ if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && !wiphy->regd && initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && - !is_world_regdom(last_request->alpha2)) { - REG_DBG_PRINT("Ignoring regulatory request %s " - "since the driver requires its own regulatory " - "domain to be set first\n", + !is_world_regdom(lr->alpha2)) { + REG_DBG_PRINT("Ignoring regulatory request %s since the driver requires its own regulatory domain to be set first\n", reg_initiator_name(initiator)); return true; } - if (reg_request_cell_base(last_request)) + if (reg_request_cell_base(lr)) return reg_dev_ignore_cell_hint(wiphy); return false; } -static void handle_reg_beacon(struct wiphy *wiphy, - unsigned int chan_idx, +static bool reg_is_world_roaming(struct wiphy *wiphy) +{ + const struct ieee80211_regdomain *cr = get_cfg80211_regdom(); + const struct ieee80211_regdomain *wr = get_wiphy_regdom(wiphy); + struct regulatory_request *lr = get_last_request(); + + if (is_world_regdom(cr->alpha2) || (wr && is_world_regdom(wr->alpha2))) + return true; + + if (lr && lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && + wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) + return true; + + return false; +} + +static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, struct reg_beacon *reg_beacon) { struct ieee80211_supported_band *sband; @@ -1051,8 +1040,6 @@ static void handle_reg_beacon(struct wiphy *wiphy, bool channel_changed = false; struct ieee80211_channel chan_before; - assert_cfg80211_lock(); - sband = wiphy->bands[reg_beacon->chan.band]; chan = &sband->channels[chan_idx]; @@ -1064,6 +1051,9 @@ static void handle_reg_beacon(struct wiphy *wiphy, chan->beacon_found = true; + if (!reg_is_world_roaming(wiphy)) + return; + if (wiphy->flags & WIPHY_FLAG_DISABLE_BEACON_HINTS) return; @@ -1094,8 +1084,6 @@ static void wiphy_update_new_beacon(struct wiphy *wiphy, unsigned int i; struct ieee80211_supported_band *sband; - assert_cfg80211_lock(); - if (!wiphy->bands[reg_beacon->chan.band]) return; @@ -1114,11 +1102,6 @@ static void wiphy_update_beacon_reg(struct wiphy *wiphy) struct ieee80211_supported_band *sband; struct reg_beacon *reg_beacon; - assert_cfg80211_lock(); - - if (list_empty(®_beacon_list)) - return; - list_for_each_entry(reg_beacon, ®_beacon_list, list) { if (!wiphy->bands[reg_beacon->chan.band]) continue; @@ -1128,18 +1111,6 @@ static void wiphy_update_beacon_reg(struct wiphy *wiphy) } } -static bool reg_is_world_roaming(struct wiphy *wiphy) -{ - if (is_world_regdom(cfg80211_regdomain->alpha2) || - (wiphy->regd && is_world_regdom(wiphy->regd->alpha2))) - return true; - if (last_request && - last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && - wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) - return true; - return false; -} - /* Reap the advantages of previously found beacons */ static void reg_process_beacons(struct wiphy *wiphy) { @@ -1149,39 +1120,29 @@ static void reg_process_beacons(struct wiphy *wiphy) */ if (!last_request) return; - if (!reg_is_world_roaming(wiphy)) - return; wiphy_update_beacon_reg(wiphy); } -static bool is_ht40_not_allowed(struct ieee80211_channel *chan) +static bool is_ht40_allowed(struct ieee80211_channel *chan) { if (!chan) - return true; + return false; if (chan->flags & IEEE80211_CHAN_DISABLED) - return true; + return false; /* This would happen when regulatory rules disallow HT40 completely */ - if (IEEE80211_CHAN_NO_HT40 == (chan->flags & (IEEE80211_CHAN_NO_HT40))) - return true; - return false; + if ((chan->flags & IEEE80211_CHAN_NO_HT40) == IEEE80211_CHAN_NO_HT40) + return false; + return true; } static void reg_process_ht_flags_channel(struct wiphy *wiphy, - enum ieee80211_band band, - unsigned int chan_idx) + struct ieee80211_channel *channel) { - struct ieee80211_supported_band *sband; - struct ieee80211_channel *channel; + struct ieee80211_supported_band *sband = wiphy->bands[channel->band]; struct ieee80211_channel *channel_before = NULL, *channel_after = NULL; unsigned int i; - assert_cfg80211_lock(); - - sband = wiphy->bands[band]; - BUG_ON(chan_idx >= sband->n_channels); - channel = &sband->channels[chan_idx]; - - if (is_ht40_not_allowed(channel)) { + if (!is_ht40_allowed(channel)) { channel->flags |= IEEE80211_CHAN_NO_HT40; return; } @@ -1192,6 +1153,7 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy, */ for (i = 0; i < sband->n_channels; i++) { struct ieee80211_channel *c = &sband->channels[i]; + if (c->center_freq == (channel->center_freq - 20)) channel_before = c; if (c->center_freq == (channel->center_freq + 20)) @@ -1203,28 +1165,27 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy, * if that ever changes we also need to change the below logic * to include that as well. */ - if (is_ht40_not_allowed(channel_before)) + if (!is_ht40_allowed(channel_before)) channel->flags |= IEEE80211_CHAN_NO_HT40MINUS; else channel->flags &= ~IEEE80211_CHAN_NO_HT40MINUS; - if (is_ht40_not_allowed(channel_after)) + if (!is_ht40_allowed(channel_after)) channel->flags |= IEEE80211_CHAN_NO_HT40PLUS; else channel->flags &= ~IEEE80211_CHAN_NO_HT40PLUS; } static void reg_process_ht_flags_band(struct wiphy *wiphy, - enum ieee80211_band band) + struct ieee80211_supported_band *sband) { unsigned int i; - struct ieee80211_supported_band *sband; - BUG_ON(!wiphy->bands[band]); - sband = wiphy->bands[band]; + if (!sband) + return; for (i = 0; i < sband->n_channels; i++) - reg_process_ht_flags_channel(wiphy, band, i); + reg_process_ht_flags_channel(wiphy, &sband->channels[i]); } static void reg_process_ht_flags(struct wiphy *wiphy) @@ -1234,34 +1195,29 @@ static void reg_process_ht_flags(struct wiphy *wiphy) if (!wiphy) return; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { - if (wiphy->bands[band]) - reg_process_ht_flags_band(wiphy, band); - } - + for (band = 0; band < IEEE80211_NUM_BANDS; band++) + reg_process_ht_flags_band(wiphy, wiphy->bands[band]); } static void wiphy_update_regulatory(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { enum ieee80211_band band; - - assert_reg_lock(); + struct regulatory_request *lr = get_last_request(); if (ignore_reg_update(wiphy, initiator)) return; - last_request->dfs_region = cfg80211_regdomain->dfs_region; + lr->dfs_region = get_cfg80211_regdom()->dfs_region; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { - if (wiphy->bands[band]) - handle_band(wiphy, band, initiator); - } + for (band = 0; band < IEEE80211_NUM_BANDS; band++) + handle_band(wiphy, initiator, wiphy->bands[band]); reg_process_beacons(wiphy); reg_process_ht_flags(wiphy); + if (wiphy->reg_notifier) - wiphy->reg_notifier(wiphy, last_request); + wiphy->reg_notifier(wiphy, lr); } static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) @@ -1269,6 +1225,8 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) struct cfg80211_registered_device *rdev; struct wiphy *wiphy; + assert_cfg80211_lock(); + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { wiphy = &rdev->wiphy; wiphy_update_regulatory(wiphy, initiator); @@ -1280,53 +1238,40 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) if (initiator == NL80211_REGDOM_SET_BY_CORE && wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY && wiphy->reg_notifier) - wiphy->reg_notifier(wiphy, last_request); + wiphy->reg_notifier(wiphy, get_last_request()); } } static void handle_channel_custom(struct wiphy *wiphy, - enum ieee80211_band band, - unsigned int chan_idx, + struct ieee80211_channel *chan, const struct ieee80211_regdomain *regd) { - int r; - u32 desired_bw_khz = MHZ_TO_KHZ(20); u32 bw_flags = 0; const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; const struct ieee80211_freq_range *freq_range = NULL; - struct ieee80211_supported_band *sband; - struct ieee80211_channel *chan; - assert_reg_lock(); + reg_rule = freq_reg_info_regd(wiphy, MHZ_TO_KHZ(chan->center_freq), + regd); - sband = wiphy->bands[band]; - BUG_ON(chan_idx >= sband->n_channels); - chan = &sband->channels[chan_idx]; - - r = freq_reg_info_regd(wiphy, - MHZ_TO_KHZ(chan->center_freq), - desired_bw_khz, - ®_rule, - regd); - - if (r) { - REG_DBG_PRINT("Disabling freq %d MHz as custom " - "regd has no rule that fits a %d MHz " - "wide channel\n", - chan->center_freq, - KHZ_TO_MHZ(desired_bw_khz)); + if (IS_ERR(reg_rule)) { + REG_DBG_PRINT("Disabling freq %d MHz as custom regd has no rule that fits it\n", + chan->center_freq); chan->flags = IEEE80211_CHAN_DISABLED; return; } - chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule); + chan_reg_rule_print_dbg(chan, reg_rule); power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40)) bw_flags = IEEE80211_CHAN_NO_HT40; + if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(80)) + bw_flags |= IEEE80211_CHAN_NO_80MHZ; + if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(160)) + bw_flags |= IEEE80211_CHAN_NO_160MHZ; chan->flags |= map_regdom_flags(reg_rule->flags) | bw_flags; chan->max_antenna_gain = (int) MBI_TO_DBI(power_rule->max_antenna_gain); @@ -1334,17 +1279,17 @@ static void handle_channel_custom(struct wiphy *wiphy, (int) MBM_TO_DBM(power_rule->max_eirp); } -static void handle_band_custom(struct wiphy *wiphy, enum ieee80211_band band, +static void handle_band_custom(struct wiphy *wiphy, + struct ieee80211_supported_band *sband, const struct ieee80211_regdomain *regd) { unsigned int i; - struct ieee80211_supported_band *sband; - BUG_ON(!wiphy->bands[band]); - sband = wiphy->bands[band]; + if (!sband) + return; for (i = 0; i < sband->n_channels; i++) - handle_channel_custom(wiphy, band, i, regd); + handle_channel_custom(wiphy, &sband->channels[i], regd); } /* Used by drivers prior to wiphy registration */ @@ -1354,60 +1299,50 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, enum ieee80211_band band; unsigned int bands_set = 0; - mutex_lock(®_mutex); for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (!wiphy->bands[band]) continue; - handle_band_custom(wiphy, band, regd); + handle_band_custom(wiphy, wiphy->bands[band], regd); bands_set++; } - mutex_unlock(®_mutex); /* * no point in calling this if it won't have any effect - * on your device's supportd bands. + * on your device's supported bands. */ WARN_ON(!bands_set); } EXPORT_SYMBOL(wiphy_apply_custom_regulatory); -/* - * Return value which can be used by ignore_request() to indicate - * it has been determined we should intersect two regulatory domains - */ -#define REG_INTERSECT 1 - /* This has the logic which determines when a new request * should be ignored. */ -static int ignore_request(struct wiphy *wiphy, +static enum reg_request_treatment +get_reg_request_treatment(struct wiphy *wiphy, struct regulatory_request *pending_request) { struct wiphy *last_wiphy = NULL; - - assert_cfg80211_lock(); + struct regulatory_request *lr = get_last_request(); /* All initial requests are respected */ - if (!last_request) - return 0; + if (!lr) + return REG_REQ_OK; switch (pending_request->initiator) { case NL80211_REGDOM_SET_BY_CORE: - return 0; + return REG_REQ_OK; case NL80211_REGDOM_SET_BY_COUNTRY_IE: - - if (reg_request_cell_base(last_request)) { + if (reg_request_cell_base(lr)) { /* Trust a Cell base station over the AP's country IE */ if (regdom_changes(pending_request->alpha2)) - return -EOPNOTSUPP; - return -EALREADY; + return REG_REQ_IGNORE; + return REG_REQ_ALREADY_SET; } - last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + last_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); if (unlikely(!is_an_alpha2(pending_request->alpha2))) return -EINVAL; - if (last_request->initiator == - NL80211_REGDOM_SET_BY_COUNTRY_IE) { + if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) { if (last_wiphy != wiphy) { /* * Two cards with two APs claiming different @@ -1416,23 +1351,23 @@ static int ignore_request(struct wiphy *wiphy, * to be correct. Reject second one for now. */ if (regdom_changes(pending_request->alpha2)) - return -EOPNOTSUPP; - return -EALREADY; + return REG_REQ_IGNORE; + return REG_REQ_ALREADY_SET; } /* * Two consecutive Country IE hints on the same wiphy. * This should be picked up early by the driver/stack */ if (WARN_ON(regdom_changes(pending_request->alpha2))) - return 0; - return -EALREADY; + return REG_REQ_OK; + return REG_REQ_ALREADY_SET; } return 0; case NL80211_REGDOM_SET_BY_DRIVER: - if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE) { + if (lr->initiator == NL80211_REGDOM_SET_BY_CORE) { if (regdom_changes(pending_request->alpha2)) - return 0; - return -EALREADY; + return REG_REQ_OK; + return REG_REQ_ALREADY_SET; } /* @@ -1440,59 +1375,59 @@ static int ignore_request(struct wiphy *wiphy, * back in or if you add a new device for which the previously * loaded card also agrees on the regulatory domain. */ - if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && + if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && !regdom_changes(pending_request->alpha2)) - return -EALREADY; + return REG_REQ_ALREADY_SET; - return REG_INTERSECT; + return REG_REQ_INTERSECT; case NL80211_REGDOM_SET_BY_USER: if (reg_request_cell_base(pending_request)) return reg_ignore_cell_hint(pending_request); - if (reg_request_cell_base(last_request)) - return -EOPNOTSUPP; + if (reg_request_cell_base(lr)) + return REG_REQ_IGNORE; - if (last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) - return REG_INTERSECT; + if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) + return REG_REQ_INTERSECT; /* * If the user knows better the user should set the regdom * to their country before the IE is picked up */ - if (last_request->initiator == NL80211_REGDOM_SET_BY_USER && - last_request->intersect) - return -EOPNOTSUPP; + if (lr->initiator == NL80211_REGDOM_SET_BY_USER && + lr->intersect) + return REG_REQ_IGNORE; /* * Process user requests only after previous user/driver/core * requests have been processed */ - if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE || - last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER || - last_request->initiator == NL80211_REGDOM_SET_BY_USER) { - if (regdom_changes(last_request->alpha2)) - return -EAGAIN; - } + if ((lr->initiator == NL80211_REGDOM_SET_BY_CORE || + lr->initiator == NL80211_REGDOM_SET_BY_DRIVER || + lr->initiator == NL80211_REGDOM_SET_BY_USER) && + regdom_changes(lr->alpha2)) + return REG_REQ_IGNORE; if (!regdom_changes(pending_request->alpha2)) - return -EALREADY; + return REG_REQ_ALREADY_SET; - return 0; + return REG_REQ_OK; } - return -EINVAL; + return REG_REQ_IGNORE; } static void reg_set_request_processed(void) { bool need_more_processing = false; + struct regulatory_request *lr = get_last_request(); - last_request->processed = true; + lr->processed = true; spin_lock(®_requests_lock); if (!list_empty(®_requests_list)) need_more_processing = true; spin_unlock(®_requests_lock); - if (last_request->initiator == NL80211_REGDOM_SET_BY_USER) + if (lr->initiator == NL80211_REGDOM_SET_BY_USER) cancel_delayed_work(®_timeout); if (need_more_processing) @@ -1508,116 +1443,122 @@ static void reg_set_request_processed(void) * The Wireless subsystem can use this function to hint to the wireless core * what it believes should be the current regulatory domain. * - * Returns zero if all went fine, %-EALREADY if a regulatory domain had - * already been set or other standard error codes. + * Returns one of the different reg request treatment values. * - * Caller must hold &cfg80211_mutex and ®_mutex + * Caller must hold ®_mutex */ -static int __regulatory_hint(struct wiphy *wiphy, - struct regulatory_request *pending_request) +static enum reg_request_treatment +__regulatory_hint(struct wiphy *wiphy, + struct regulatory_request *pending_request) { + const struct ieee80211_regdomain *regd; bool intersect = false; - int r = 0; - - assert_cfg80211_lock(); + enum reg_request_treatment treatment; + struct regulatory_request *lr; - r = ignore_request(wiphy, pending_request); + treatment = get_reg_request_treatment(wiphy, pending_request); - if (r == REG_INTERSECT) { + switch (treatment) { + case REG_REQ_INTERSECT: if (pending_request->initiator == NL80211_REGDOM_SET_BY_DRIVER) { - r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); - if (r) { + regd = reg_copy_regd(get_cfg80211_regdom()); + if (IS_ERR(regd)) { kfree(pending_request); - return r; + return PTR_ERR(regd); } + rcu_assign_pointer(wiphy->regd, regd); } intersect = true; - } else if (r) { + break; + case REG_REQ_OK: + break; + default: /* * If the regulatory domain being requested by the * driver has already been set just copy it to the * wiphy */ - if (r == -EALREADY && - pending_request->initiator == - NL80211_REGDOM_SET_BY_DRIVER) { - r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); - if (r) { + if (treatment == REG_REQ_ALREADY_SET && + pending_request->initiator == NL80211_REGDOM_SET_BY_DRIVER) { + regd = reg_copy_regd(get_cfg80211_regdom()); + if (IS_ERR(regd)) { kfree(pending_request); - return r; + return REG_REQ_IGNORE; } - r = -EALREADY; + treatment = REG_REQ_ALREADY_SET; + rcu_assign_pointer(wiphy->regd, regd); goto new_request; } kfree(pending_request); - return r; + return treatment; } new_request: - if (last_request != &core_request_world) - kfree(last_request); + lr = get_last_request(); + if (lr != &core_request_world && lr) + kfree_rcu(lr, rcu_head); - last_request = pending_request; - last_request->intersect = intersect; + pending_request->intersect = intersect; + pending_request->processed = false; + rcu_assign_pointer(last_request, pending_request); + lr = pending_request; pending_request = NULL; - if (last_request->initiator == NL80211_REGDOM_SET_BY_USER) { - user_alpha2[0] = last_request->alpha2[0]; - user_alpha2[1] = last_request->alpha2[1]; + if (lr->initiator == NL80211_REGDOM_SET_BY_USER) { + user_alpha2[0] = lr->alpha2[0]; + user_alpha2[1] = lr->alpha2[1]; } - /* When r == REG_INTERSECT we do need to call CRDA */ - if (r < 0) { + /* When r == REG_REQ_INTERSECT we do need to call CRDA */ + if (treatment != REG_REQ_OK && treatment != REG_REQ_INTERSECT) { /* * Since CRDA will not be called in this case as we already * have applied the requested regulatory domain before we just * inform userspace we have processed the request */ - if (r == -EALREADY) { - nl80211_send_reg_change_event(last_request); + if (treatment == REG_REQ_ALREADY_SET) { + nl80211_send_reg_change_event(lr); reg_set_request_processed(); } - return r; + return treatment; } - return call_crda(last_request->alpha2); + if (call_crda(lr->alpha2)) + return REG_REQ_IGNORE; + return REG_REQ_OK; } /* This processes *all* regulatory hints */ static void reg_process_hint(struct regulatory_request *reg_request, enum nl80211_reg_initiator reg_initiator) { - int r = 0; struct wiphy *wiphy = NULL; - BUG_ON(!reg_request->alpha2); + if (WARN_ON(!reg_request->alpha2)) + return; - if (wiphy_idx_valid(reg_request->wiphy_idx)) + if (reg_request->wiphy_idx != WIPHY_IDX_INVALID) wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); - if (reg_initiator == NL80211_REGDOM_SET_BY_DRIVER && - !wiphy) { + if (reg_initiator == NL80211_REGDOM_SET_BY_DRIVER && !wiphy) { kfree(reg_request); return; } - r = __regulatory_hint(wiphy, reg_request); - /* This is required so that the orig_* parameters are saved */ - if (r == -EALREADY && wiphy && - wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { - wiphy_update_regulatory(wiphy, reg_initiator); - return; + switch (__regulatory_hint(wiphy, reg_request)) { + case REG_REQ_ALREADY_SET: + /* This is required so that the orig_* parameters are saved */ + if (wiphy && wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) + wiphy_update_regulatory(wiphy, reg_initiator); + break; + default: + if (reg_initiator == NL80211_REGDOM_SET_BY_USER) + schedule_delayed_work(®_timeout, + msecs_to_jiffies(3142)); + break; } - - /* - * We only time out user hints, given that they should be the only - * source of bogus requests. - */ - if (r != -EALREADY && - reg_initiator == NL80211_REGDOM_SET_BY_USER) - schedule_delayed_work(®_timeout, msecs_to_jiffies(3142)); } /* @@ -1627,15 +1568,15 @@ static void reg_process_hint(struct regulatory_request *reg_request, */ static void reg_process_pending_hints(void) { - struct regulatory_request *reg_request; + struct regulatory_request *reg_request, *lr; mutex_lock(&cfg80211_mutex); mutex_lock(®_mutex); + lr = get_last_request(); /* When last_request->processed becomes true this will be rescheduled */ - if (last_request && !last_request->processed) { - REG_DBG_PRINT("Pending regulatory request, waiting " - "for it to be processed...\n"); + if (lr && !lr->processed) { + REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n"); goto out; } @@ -1666,23 +1607,14 @@ static void reg_process_pending_beacon_hints(void) struct cfg80211_registered_device *rdev; struct reg_beacon *pending_beacon, *tmp; - /* - * No need to hold the reg_mutex here as we just touch wiphys - * and do not read or access regulatory variables. - */ mutex_lock(&cfg80211_mutex); + mutex_lock(®_mutex); /* This goes through the _pending_ beacon list */ spin_lock_bh(®_pending_beacons_lock); - if (list_empty(®_pending_beacons)) { - spin_unlock_bh(®_pending_beacons_lock); - goto out; - } - list_for_each_entry_safe(pending_beacon, tmp, ®_pending_beacons, list) { - list_del_init(&pending_beacon->list); /* Applies the beacon hint to current wiphys */ @@ -1694,7 +1626,7 @@ static void reg_process_pending_beacon_hints(void) } spin_unlock_bh(®_pending_beacons_lock); -out: + mutex_unlock(®_mutex); mutex_unlock(&cfg80211_mutex); } @@ -1706,10 +1638,8 @@ static void reg_todo(struct work_struct *work) static void queue_regulatory_request(struct regulatory_request *request) { - if (isalpha(request->alpha2[0])) - request->alpha2[0] = toupper(request->alpha2[0]); - if (isalpha(request->alpha2[1])) - request->alpha2[1] = toupper(request->alpha2[1]); + request->alpha2[0] = toupper(request->alpha2[0]); + request->alpha2[1] = toupper(request->alpha2[1]); spin_lock(®_requests_lock); list_add_tail(&request->list, ®_requests_list); @@ -1726,8 +1656,7 @@ static int regulatory_hint_core(const char *alpha2) { struct regulatory_request *request; - request = kzalloc(sizeof(struct regulatory_request), - GFP_KERNEL); + request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) return -ENOMEM; @@ -1746,13 +1675,14 @@ int regulatory_hint_user(const char *alpha2, { struct regulatory_request *request; - BUG_ON(!alpha2); + if (WARN_ON(!alpha2)) + return -EINVAL; request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) return -ENOMEM; - request->wiphy_idx = WIPHY_IDX_STALE; + request->wiphy_idx = WIPHY_IDX_INVALID; request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_USER; @@ -1768,8 +1698,8 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) { struct regulatory_request *request; - BUG_ON(!alpha2); - BUG_ON(!wiphy); + if (WARN_ON(!alpha2 || !wiphy)) + return -EINVAL; request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) @@ -1777,9 +1707,6 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) request->wiphy_idx = get_wiphy_idx(wiphy); - /* Must have registered wiphy first */ - BUG_ON(!wiphy_idx_valid(request->wiphy_idx)); - request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_DRIVER; @@ -1794,18 +1721,17 @@ EXPORT_SYMBOL(regulatory_hint); * We hold wdev_lock() here so we cannot hold cfg80211_mutex() and * therefore cannot iterate over the rdev list here. */ -void regulatory_hint_11d(struct wiphy *wiphy, - enum ieee80211_band band, - const u8 *country_ie, - u8 country_ie_len) +void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, + const u8 *country_ie, u8 country_ie_len) { char alpha2[2]; enum environment_cap env = ENVIRON_ANY; - struct regulatory_request *request; + struct regulatory_request *request, *lr; mutex_lock(®_mutex); + lr = get_last_request(); - if (unlikely(!last_request)) + if (unlikely(!lr)) goto out; /* IE len must be evenly divisible by 2 */ @@ -1828,9 +1754,8 @@ void regulatory_hint_11d(struct wiphy *wiphy, * We leave conflict resolution to the workqueue, where can hold * cfg80211_mutex. */ - if (likely(last_request->initiator == - NL80211_REGDOM_SET_BY_COUNTRY_IE && - wiphy_idx_valid(last_request->wiphy_idx))) + if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && + lr->wiphy_idx != WIPHY_IDX_INVALID) goto out; request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); @@ -1843,12 +1768,7 @@ void regulatory_hint_11d(struct wiphy *wiphy, request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE; request->country_ie_env = env; - mutex_unlock(®_mutex); - queue_regulatory_request(request); - - return; - out: mutex_unlock(®_mutex); } @@ -1863,8 +1783,7 @@ static void restore_alpha2(char *alpha2, bool reset_user) if (is_user_regdom_saved()) { /* Unless we're asked to ignore it and reset it */ if (reset_user) { - REG_DBG_PRINT("Restoring regulatory settings " - "including user preference\n"); + REG_DBG_PRINT("Restoring regulatory settings including user preference\n"); user_alpha2[0] = '9'; user_alpha2[1] = '7'; @@ -1874,26 +1793,20 @@ static void restore_alpha2(char *alpha2, bool reset_user) * back as they were for a full restore. */ if (!is_world_regdom(ieee80211_regdom)) { - REG_DBG_PRINT("Keeping preference on " - "module parameter ieee80211_regdom: %c%c\n", - ieee80211_regdom[0], - ieee80211_regdom[1]); + REG_DBG_PRINT("Keeping preference on module parameter ieee80211_regdom: %c%c\n", + ieee80211_regdom[0], ieee80211_regdom[1]); alpha2[0] = ieee80211_regdom[0]; alpha2[1] = ieee80211_regdom[1]; } } else { - REG_DBG_PRINT("Restoring regulatory settings " - "while preserving user preference for: %c%c\n", - user_alpha2[0], - user_alpha2[1]); + REG_DBG_PRINT("Restoring regulatory settings while preserving user preference for: %c%c\n", + user_alpha2[0], user_alpha2[1]); alpha2[0] = user_alpha2[0]; alpha2[1] = user_alpha2[1]; } } else if (!is_world_regdom(ieee80211_regdom)) { - REG_DBG_PRINT("Keeping preference on " - "module parameter ieee80211_regdom: %c%c\n", - ieee80211_regdom[0], - ieee80211_regdom[1]); + REG_DBG_PRINT("Keeping preference on module parameter ieee80211_regdom: %c%c\n", + ieee80211_regdom[0], ieee80211_regdom[1]); alpha2[0] = ieee80211_regdom[0]; alpha2[1] = ieee80211_regdom[1]; } else @@ -1948,7 +1861,7 @@ static void restore_regulatory_settings(bool reset_user) mutex_lock(&cfg80211_mutex); mutex_lock(®_mutex); - reset_regdomains(true); + reset_regdomains(true, &world_regdom); restore_alpha2(alpha2, reset_user); /* @@ -1958,49 +1871,35 @@ static void restore_regulatory_settings(bool reset_user) * settings. */ spin_lock(®_requests_lock); - if (!list_empty(®_requests_list)) { - list_for_each_entry_safe(reg_request, tmp, - ®_requests_list, list) { - if (reg_request->initiator != - NL80211_REGDOM_SET_BY_USER) - continue; - list_move_tail(®_request->list, &tmp_reg_req_list); - } + list_for_each_entry_safe(reg_request, tmp, ®_requests_list, list) { + if (reg_request->initiator != NL80211_REGDOM_SET_BY_USER) + continue; + list_move_tail(®_request->list, &tmp_reg_req_list); } spin_unlock(®_requests_lock); /* Clear beacon hints */ spin_lock_bh(®_pending_beacons_lock); - if (!list_empty(®_pending_beacons)) { - list_for_each_entry_safe(reg_beacon, btmp, - ®_pending_beacons, list) { - list_del(®_beacon->list); - kfree(reg_beacon); - } + list_for_each_entry_safe(reg_beacon, btmp, ®_pending_beacons, list) { + list_del(®_beacon->list); + kfree(reg_beacon); } spin_unlock_bh(®_pending_beacons_lock); - if (!list_empty(®_beacon_list)) { - list_for_each_entry_safe(reg_beacon, btmp, - ®_beacon_list, list) { - list_del(®_beacon->list); - kfree(reg_beacon); - } + list_for_each_entry_safe(reg_beacon, btmp, ®_beacon_list, list) { + list_del(®_beacon->list); + kfree(reg_beacon); } /* First restore to the basic regulatory settings */ - cfg80211_regdomain = cfg80211_world_regdom; - world_alpha2[0] = cfg80211_regdomain->alpha2[0]; - world_alpha2[1] = cfg80211_regdomain->alpha2[1]; + world_alpha2[0] = cfg80211_world_regdom->alpha2[0]; + world_alpha2[1] = cfg80211_world_regdom->alpha2[1]; list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (rdev->wiphy.flags & WIPHY_FLAG_CUSTOM_REGULATORY) restore_custom_reg_settings(&rdev->wiphy); } - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); - regulatory_hint_core(world_alpha2); /* @@ -2011,20 +1910,8 @@ static void restore_regulatory_settings(bool reset_user) if (is_an_alpha2(alpha2)) regulatory_hint_user(user_alpha2, NL80211_USER_REG_HINT_USER); - if (list_empty(&tmp_reg_req_list)) - return; - - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); - spin_lock(®_requests_lock); - list_for_each_entry_safe(reg_request, tmp, &tmp_reg_req_list, list) { - REG_DBG_PRINT("Adding request for country %c%c back " - "into the queue\n", - reg_request->alpha2[0], - reg_request->alpha2[1]); - list_move_tail(®_request->list, ®_requests_list); - } + list_splice_tail_init(&tmp_reg_req_list, ®_requests_list); spin_unlock(®_requests_lock); mutex_unlock(®_mutex); @@ -2037,8 +1924,7 @@ static void restore_regulatory_settings(bool reset_user) void regulatory_hint_disconnect(void) { - REG_DBG_PRINT("All devices are disconnected, going to " - "restore regulatory settings\n"); + REG_DBG_PRINT("All devices are disconnected, going to restore regulatory settings\n"); restore_regulatory_settings(false); } @@ -2051,31 +1937,48 @@ static bool freq_is_chan_12_13_14(u16 freq) return false; } +static bool pending_reg_beacon(struct ieee80211_channel *beacon_chan) +{ + struct reg_beacon *pending_beacon; + + list_for_each_entry(pending_beacon, ®_pending_beacons, list) + if (beacon_chan->center_freq == + pending_beacon->chan.center_freq) + return true; + return false; +} + int regulatory_hint_found_beacon(struct wiphy *wiphy, struct ieee80211_channel *beacon_chan, gfp_t gfp) { struct reg_beacon *reg_beacon; + bool processing; - if (likely((beacon_chan->beacon_found || - (beacon_chan->flags & IEEE80211_CHAN_RADAR) || + if (beacon_chan->beacon_found || + beacon_chan->flags & IEEE80211_CHAN_RADAR || (beacon_chan->band == IEEE80211_BAND_2GHZ && - !freq_is_chan_12_13_14(beacon_chan->center_freq))))) + !freq_is_chan_12_13_14(beacon_chan->center_freq))) + return 0; + + spin_lock_bh(®_pending_beacons_lock); + processing = pending_reg_beacon(beacon_chan); + spin_unlock_bh(®_pending_beacons_lock); + + if (processing) return 0; reg_beacon = kzalloc(sizeof(struct reg_beacon), gfp); if (!reg_beacon) return -ENOMEM; - REG_DBG_PRINT("Found new beacon on " - "frequency: %d MHz (Ch %d) on %s\n", + REG_DBG_PRINT("Found new beacon on frequency: %d MHz (Ch %d) on %s\n", beacon_chan->center_freq, ieee80211_frequency_to_channel(beacon_chan->center_freq), wiphy_name(wiphy)); memcpy(®_beacon->chan, beacon_chan, - sizeof(struct ieee80211_channel)); - + sizeof(struct ieee80211_channel)); /* * Since we can be called from BH or and non-BH context @@ -2155,21 +2058,19 @@ static void print_dfs_region(u8 dfs_region) pr_info(" DFS Master region JP"); break; default: - pr_info(" DFS Master region Uknown"); + pr_info(" DFS Master region Unknown"); break; } } static void print_regdomain(const struct ieee80211_regdomain *rd) { + struct regulatory_request *lr = get_last_request(); if (is_intersected_alpha2(rd->alpha2)) { - - if (last_request->initiator == - NL80211_REGDOM_SET_BY_COUNTRY_IE) { + if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) { struct cfg80211_registered_device *rdev; - rdev = cfg80211_rdev_by_wiphy_idx( - last_request->wiphy_idx); + rdev = cfg80211_rdev_by_wiphy_idx(lr->wiphy_idx); if (rdev) { pr_info("Current regulatory domain updated by AP to: %c%c\n", rdev->country_ie_alpha2[0], @@ -2178,22 +2079,21 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) pr_info("Current regulatory domain intersected:\n"); } else pr_info("Current regulatory domain intersected:\n"); - } else if (is_world_regdom(rd->alpha2)) + } else if (is_world_regdom(rd->alpha2)) { pr_info("World regulatory domain updated:\n"); - else { + } else { if (is_unknown_alpha2(rd->alpha2)) pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n"); else { - if (reg_request_cell_base(last_request)) - pr_info("Regulatory domain changed " - "to country: %c%c by Cell Station\n", + if (reg_request_cell_base(lr)) + pr_info("Regulatory domain changed to country: %c%c by Cell Station\n", rd->alpha2[0], rd->alpha2[1]); else - pr_info("Regulatory domain changed " - "to country: %c%c\n", + pr_info("Regulatory domain changed to country: %c%c\n", rd->alpha2[0], rd->alpha2[1]); } } + print_dfs_region(rd->dfs_region); print_rd_rules(rd); } @@ -2207,22 +2107,23 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd) /* Takes ownership of rd only if it doesn't fail */ static int __set_regdom(const struct ieee80211_regdomain *rd) { + const struct ieee80211_regdomain *regd; const struct ieee80211_regdomain *intersected_rd = NULL; struct wiphy *request_wiphy; + struct regulatory_request *lr = get_last_request(); + /* Some basic sanity checks first */ + if (!reg_is_valid_request(rd->alpha2)) + return -EINVAL; + if (is_world_regdom(rd->alpha2)) { - if (WARN_ON(!reg_is_valid_request(rd->alpha2))) - return -EINVAL; update_world_regdomain(rd); return 0; } if (!is_alpha2_set(rd->alpha2) && !is_an_alpha2(rd->alpha2) && - !is_unknown_alpha2(rd->alpha2)) - return -EINVAL; - - if (!last_request) + !is_unknown_alpha2(rd->alpha2)) return -EINVAL; /* @@ -2230,7 +2131,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) * rd is non static (it means CRDA was present and was used last) * and the pending request came in from a country IE */ - if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { + if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { /* * If someone else asked us to change the rd lets only bother * checking if the alpha2 changes if CRDA was already called @@ -2246,29 +2147,23 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) * internal EEPROM data */ - if (WARN_ON(!reg_is_valid_request(rd->alpha2))) - return -EINVAL; - if (!is_valid_rd(rd)) { pr_err("Invalid regulatory domain detected:\n"); print_regdomain_info(rd); return -EINVAL; } - request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); if (!request_wiphy && - (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER || - last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)) { + (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER || + lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)) { schedule_delayed_work(®_timeout, 0); return -ENODEV; } - if (!last_request->intersect) { - int r; - - if (last_request->initiator != NL80211_REGDOM_SET_BY_DRIVER) { - reset_regdomains(false); - cfg80211_regdomain = rd; + if (!lr->intersect) { + if (lr->initiator != NL80211_REGDOM_SET_BY_DRIVER) { + reset_regdomains(false, rd); return 0; } @@ -2284,20 +2179,19 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (request_wiphy->regd) return -EALREADY; - r = reg_copy_regd(&request_wiphy->regd, rd); - if (r) - return r; + regd = reg_copy_regd(rd); + if (IS_ERR(regd)) + return PTR_ERR(regd); - reset_regdomains(false); - cfg80211_regdomain = rd; + rcu_assign_pointer(request_wiphy->regd, regd); + reset_regdomains(false, rd); return 0; } /* Intersection requires a bit more work */ - if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { - - intersected_rd = regdom_intersect(rd, cfg80211_regdomain); + if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { + intersected_rd = regdom_intersect(rd, get_cfg80211_regdom()); if (!intersected_rd) return -EINVAL; @@ -2306,15 +2200,19 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) * However if a driver requested this specific regulatory * domain we keep it for its private use */ - if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER) - request_wiphy->regd = rd; - else + if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER) { + const struct ieee80211_regdomain *tmp; + + tmp = get_wiphy_regdom(request_wiphy); + rcu_assign_pointer(request_wiphy->regd, rd); + rcu_free_regdom(tmp); + } else { kfree(rd); + } rd = NULL; - reset_regdomains(false); - cfg80211_regdomain = intersected_rd; + reset_regdomains(false, intersected_rd); return 0; } @@ -2326,15 +2224,15 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) /* * Use this call to set the current regulatory domain. Conflicts with * multiple drivers can be ironed out later. Caller must've already - * kmalloc'd the rd structure. Caller must hold cfg80211_mutex + * kmalloc'd the rd structure. */ int set_regdom(const struct ieee80211_regdomain *rd) { + struct regulatory_request *lr; int r; - assert_cfg80211_lock(); - mutex_lock(®_mutex); + lr = get_last_request(); /* Note that this doesn't update the wiphys, this is done below */ r = __set_regdom(rd); @@ -2343,51 +2241,52 @@ int set_regdom(const struct ieee80211_regdomain *rd) reg_set_request_processed(); kfree(rd); - mutex_unlock(®_mutex); - return r; + goto out; } /* This would make this whole thing pointless */ - if (!last_request->intersect) - BUG_ON(rd != cfg80211_regdomain); + if (WARN_ON(!lr->intersect && rd != get_cfg80211_regdom())) { + r = -EINVAL; + goto out; + } /* update all wiphys now with the new established regulatory domain */ - update_all_wiphy_regulatory(last_request->initiator); + update_all_wiphy_regulatory(lr->initiator); - print_regdomain(cfg80211_regdomain); + print_regdomain(get_cfg80211_regdom()); - nl80211_send_reg_change_event(last_request); + nl80211_send_reg_change_event(lr); reg_set_request_processed(); + out: mutex_unlock(®_mutex); return r; } -#ifdef CONFIG_HOTPLUG int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) { - if (last_request && !last_request->processed) { - if (add_uevent_var(env, "COUNTRY=%c%c", - last_request->alpha2[0], - last_request->alpha2[1])) - return -ENOMEM; + struct regulatory_request *lr; + u8 alpha2[2]; + bool add = false; + + rcu_read_lock(); + lr = get_last_request(); + if (lr && !lr->processed) { + memcpy(alpha2, lr->alpha2, 2); + add = true; } + rcu_read_unlock(); + if (add) + return add_uevent_var(env, "COUNTRY=%c%c", + alpha2[0], alpha2[1]); return 0; } -#else -int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) -{ - return -ENODEV; -} -#endif /* CONFIG_HOTPLUG */ void wiphy_regulatory_register(struct wiphy *wiphy) { - assert_cfg80211_lock(); - mutex_lock(®_mutex); if (!reg_dev_ignore_cell_hint(wiphy)) @@ -2402,32 +2301,32 @@ void wiphy_regulatory_register(struct wiphy *wiphy) void wiphy_regulatory_deregister(struct wiphy *wiphy) { struct wiphy *request_wiphy = NULL; - - assert_cfg80211_lock(); + struct regulatory_request *lr; mutex_lock(®_mutex); + lr = get_last_request(); if (!reg_dev_ignore_cell_hint(wiphy)) reg_num_devs_support_basehint--; - kfree(wiphy->regd); + rcu_free_regdom(get_wiphy_regdom(wiphy)); + rcu_assign_pointer(wiphy->regd, NULL); - if (last_request) - request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + if (lr) + request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); if (!request_wiphy || request_wiphy != wiphy) goto out; - last_request->wiphy_idx = WIPHY_IDX_STALE; - last_request->country_ie_env = ENVIRON_ANY; + lr->wiphy_idx = WIPHY_IDX_INVALID; + lr->country_ie_env = ENVIRON_ANY; out: mutex_unlock(®_mutex); } static void reg_timeout_work(struct work_struct *work) { - REG_DBG_PRINT("Timeout while waiting for CRDA to reply, " - "restoring regulatory settings\n"); + REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n"); restore_regulatory_settings(true); } @@ -2446,13 +2345,13 @@ int __init regulatory_init(void) reg_regdb_size_check(); - cfg80211_regdomain = cfg80211_world_regdom; + rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom); user_alpha2[0] = '9'; user_alpha2[1] = '7'; /* We always try to get an update for the static regdomain */ - err = regulatory_hint_core(cfg80211_regdomain->alpha2); + err = regulatory_hint_core(cfg80211_world_regdom->alpha2); if (err) { if (err == -ENOMEM) return err; @@ -2464,10 +2363,6 @@ int __init regulatory_init(void) * errors as non-fatal. */ pr_err("kobject_uevent_env() was unable to call CRDA during init\n"); -#ifdef CONFIG_CFG80211_REG_DEBUG - /* We want to find out exactly why when debugging */ - WARN_ON(err); -#endif } /* @@ -2481,7 +2376,7 @@ int __init regulatory_init(void) return 0; } -void /* __init_or_exit */ regulatory_exit(void) +void regulatory_exit(void) { struct regulatory_request *reg_request, *tmp; struct reg_beacon *reg_beacon, *btmp; @@ -2489,43 +2384,27 @@ void /* __init_or_exit */ regulatory_exit(void) cancel_work_sync(®_work); cancel_delayed_work_sync(®_timeout); - mutex_lock(&cfg80211_mutex); + /* Lock to suppress warnings */ mutex_lock(®_mutex); - - reset_regdomains(true); + reset_regdomains(true, NULL); + mutex_unlock(®_mutex); dev_set_uevent_suppress(®_pdev->dev, true); platform_device_unregister(reg_pdev); - spin_lock_bh(®_pending_beacons_lock); - if (!list_empty(®_pending_beacons)) { - list_for_each_entry_safe(reg_beacon, btmp, - ®_pending_beacons, list) { - list_del(®_beacon->list); - kfree(reg_beacon); - } + list_for_each_entry_safe(reg_beacon, btmp, ®_pending_beacons, list) { + list_del(®_beacon->list); + kfree(reg_beacon); } - spin_unlock_bh(®_pending_beacons_lock); - if (!list_empty(®_beacon_list)) { - list_for_each_entry_safe(reg_beacon, btmp, - ®_beacon_list, list) { - list_del(®_beacon->list); - kfree(reg_beacon); - } + list_for_each_entry_safe(reg_beacon, btmp, ®_beacon_list, list) { + list_del(®_beacon->list); + kfree(reg_beacon); } - spin_lock(®_requests_lock); - if (!list_empty(®_requests_list)) { - list_for_each_entry_safe(reg_request, tmp, - ®_requests_list, list) { - list_del(®_request->list); - kfree(reg_request); - } + list_for_each_entry_safe(reg_request, tmp, ®_requests_list, list) { + list_del(®_request->list); + kfree(reg_request); } - spin_unlock(®_requests_lock); - - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); } diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 4c0a32f..af2d5f8 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -16,10 +16,9 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -extern const struct ieee80211_regdomain *cfg80211_regdomain; +extern const struct ieee80211_regdomain __rcu *cfg80211_regdomain; bool is_world_regdom(const char *alpha2); -bool reg_is_valid_request(const char *alpha2); bool reg_supported_dfs_region(u8 dfs_region); int regulatory_hint_user(const char *alpha2, @@ -55,8 +54,8 @@ bool reg_last_request_cell_base(void); * set the wiphy->disable_beacon_hints to true. */ int regulatory_hint_found_beacon(struct wiphy *wiphy, - struct ieee80211_channel *beacon_chan, - gfp_t gfp); + struct ieee80211_channel *beacon_chan, + gfp_t gfp); /** * regulatory_hint_11d - hints a country IE as a regulatory domain diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 01592d7..fd99ea4 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -19,55 +19,142 @@ #include "wext-compat.h" #include "rdev-ops.h" +/** + * DOC: BSS tree/list structure + * + * At the top level, the BSS list is kept in both a list in each + * registered device (@bss_list) as well as an RB-tree for faster + * lookup. In the RB-tree, entries can be looked up using their + * channel, MESHID, MESHCONF (for MBSSes) or channel, BSSID, SSID + * for other BSSes. + * + * Due to the possibility of hidden SSIDs, there's a second level + * structure, the "hidden_list" and "hidden_beacon_bss" pointer. + * The hidden_list connects all BSSes belonging to a single AP + * that has a hidden SSID, and connects beacon and probe response + * entries. For a probe response entry for a hidden SSID, the + * hidden_beacon_bss pointer points to the BSS struct holding the + * beacon's information. + * + * Reference counting is done for all these references except for + * the hidden_list, so that a beacon BSS struct that is otherwise + * not referenced has one reference for being on the bss_list and + * one for each probe response entry that points to it using the + * hidden_beacon_bss pointer. When a BSS struct that has such a + * pointer is get/put, the refcount update is also propagated to + * the referenced struct, this ensure that it cannot get removed + * while somebody is using the probe response version. + * + * Note that the hidden_beacon_bss pointer never changes, due to + * the reference counting. Therefore, no locking is needed for + * it. + * + * Also note that the hidden_beacon_bss pointer is only relevant + * if the driver uses something other than the IEs, e.g. private + * data stored stored in the BSS struct, since the beacon IEs are + * also linked into the probe response struct. + */ + #define IEEE80211_SCAN_RESULT_EXPIRE (30 * HZ) -static void bss_release(struct kref *ref) +static void bss_free(struct cfg80211_internal_bss *bss) { struct cfg80211_bss_ies *ies; - struct cfg80211_internal_bss *bss; - - bss = container_of(ref, struct cfg80211_internal_bss, ref); if (WARN_ON(atomic_read(&bss->hold))) return; - if (bss->pub.free_priv) - bss->pub.free_priv(&bss->pub); - ies = (void *)rcu_access_pointer(bss->pub.beacon_ies); - if (ies) + if (ies && !bss->pub.hidden_beacon_bss) kfree_rcu(ies, rcu_head); ies = (void *)rcu_access_pointer(bss->pub.proberesp_ies); if (ies) kfree_rcu(ies, rcu_head); + /* + * This happens when the module is removed, it doesn't + * really matter any more save for completeness + */ + if (!list_empty(&bss->hidden_list)) + list_del(&bss->hidden_list); + kfree(bss); } -/* must hold dev->bss_lock! */ -static void __cfg80211_unlink_bss(struct cfg80211_registered_device *dev, +static inline void bss_ref_get(struct cfg80211_registered_device *dev, + struct cfg80211_internal_bss *bss) +{ + lockdep_assert_held(&dev->bss_lock); + + bss->refcount++; + if (bss->pub.hidden_beacon_bss) { + bss = container_of(bss->pub.hidden_beacon_bss, + struct cfg80211_internal_bss, + pub); + bss->refcount++; + } +} + +static inline void bss_ref_put(struct cfg80211_registered_device *dev, + struct cfg80211_internal_bss *bss) +{ + lockdep_assert_held(&dev->bss_lock); + + if (bss->pub.hidden_beacon_bss) { + struct cfg80211_internal_bss *hbss; + hbss = container_of(bss->pub.hidden_beacon_bss, + struct cfg80211_internal_bss, + pub); + hbss->refcount--; + if (hbss->refcount == 0) + bss_free(hbss); + } + bss->refcount--; + if (bss->refcount == 0) + bss_free(bss); +} + +static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *dev, struct cfg80211_internal_bss *bss) { + lockdep_assert_held(&dev->bss_lock); + + if (!list_empty(&bss->hidden_list)) { + /* + * don't remove the beacon entry if it has + * probe responses associated with it + */ + if (!bss->pub.hidden_beacon_bss) + return false; + /* + * if it's a probe response entry break its + * link to the other entries in the group + */ + list_del_init(&bss->hidden_list); + } + list_del_init(&bss->list); rb_erase(&bss->rbn, &dev->bss_tree); - kref_put(&bss->ref, bss_release); + bss_ref_put(dev, bss); + return true; } -/* must hold dev->bss_lock! */ static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev, unsigned long expire_time) { struct cfg80211_internal_bss *bss, *tmp; bool expired = false; + lockdep_assert_held(&dev->bss_lock); + list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) { if (atomic_read(&bss->hold)) continue; if (!time_after(expire_time, bss->ts)) continue; - __cfg80211_unlink_bss(dev, bss); - expired = true; + if (__cfg80211_unlink_bss(dev, bss)) + expired = true; } if (expired) @@ -82,7 +169,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) union iwreq_data wrqu; #endif - ASSERT_RDEV_LOCK(rdev); + lockdep_assert_held(&rdev->sched_scan_mtx); request = rdev->scan_req; @@ -143,9 +230,9 @@ void __cfg80211_scan_done(struct work_struct *wk) rdev = container_of(wk, struct cfg80211_registered_device, scan_done_wk); - cfg80211_lock_rdev(rdev); + mutex_lock(&rdev->sched_scan_mtx); ___cfg80211_scan_done(rdev, false); - cfg80211_unlock_rdev(rdev); + mutex_unlock(&rdev->sched_scan_mtx); } void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) @@ -234,15 +321,16 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, return 0; } -/* must hold dev->bss_lock! */ void cfg80211_bss_age(struct cfg80211_registered_device *dev, unsigned long age_secs) { struct cfg80211_internal_bss *bss; unsigned long age_jiffies = msecs_to_jiffies(age_secs * MSEC_PER_SEC); + spin_lock_bh(&dev->bss_lock); list_for_each_entry(bss, &dev->bss_list, list) bss->ts -= age_jiffies; + spin_unlock_bh(&dev->bss_lock); } void cfg80211_bss_expire(struct cfg80211_registered_device *dev) @@ -277,40 +365,24 @@ const u8 *cfg80211_find_vendor_ie(unsigned int oui, u8 oui_type, if (!pos) return NULL; - if (end - pos < sizeof(*ie)) - return NULL; - ie = (struct ieee80211_vendor_ie *)pos; + + /* make sure we can access ie->len */ + BUILD_BUG_ON(offsetof(struct ieee80211_vendor_ie, len) != 1); + + if (ie->len < sizeof(*ie)) + goto cont; + ie_oui = ie->oui[0] << 16 | ie->oui[1] << 8 | ie->oui[2]; if (ie_oui == oui && ie->oui_type == oui_type) return pos; - +cont: pos += 2 + ie->len; } return NULL; } EXPORT_SYMBOL(cfg80211_find_vendor_ie); -static int cmp_ies(u8 num, const u8 *ies1, int len1, const u8 *ies2, int len2) -{ - const u8 *ie1 = cfg80211_find_ie(num, ies1, len1); - const u8 *ie2 = cfg80211_find_ie(num, ies2, len2); - - /* equal if both missing */ - if (!ie1 && !ie2) - return 0; - /* sort missing IE before (left of) present IE */ - if (!ie1) - return -1; - if (!ie2) - return 1; - - /* sort by length first, then by contents */ - if (ie1[1] != ie2[1]) - return ie2[1] - ie1[1]; - return memcmp(ie1 + 2, ie2 + 2, ie1[1]); -} - static bool is_bss(struct cfg80211_bss *a, const u8 *bssid, const u8 *ssid, size_t ssid_len) { @@ -334,109 +406,30 @@ static bool is_bss(struct cfg80211_bss *a, const u8 *bssid, return memcmp(ssidie + 2, ssid, ssid_len) == 0; } -static bool is_mesh_bss(struct cfg80211_bss *a) -{ - const struct cfg80211_bss_ies *ies; - const u8 *ie; - - if (!WLAN_CAPABILITY_IS_STA_BSS(a->capability)) - return false; - - ies = rcu_access_pointer(a->ies); - if (!ies) - return false; - - ie = cfg80211_find_ie(WLAN_EID_MESH_ID, ies->data, ies->len); - if (!ie) - return false; - - ie = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, ies->data, ies->len); - if (!ie) - return false; - - return true; -} - -static bool is_mesh(struct cfg80211_bss *a, - const u8 *meshid, size_t meshidlen, - const u8 *meshcfg) -{ - const struct cfg80211_bss_ies *ies; - const u8 *ie; - - if (!WLAN_CAPABILITY_IS_STA_BSS(a->capability)) - return false; - - ies = rcu_access_pointer(a->ies); - if (!ies) - return false; - - ie = cfg80211_find_ie(WLAN_EID_MESH_ID, ies->data, ies->len); - if (!ie) - return false; - if (ie[1] != meshidlen) - return false; - if (memcmp(ie + 2, meshid, meshidlen)) - return false; - - ie = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, ies->data, ies->len); - if (!ie) - return false; - if (ie[1] != sizeof(struct ieee80211_meshconf_ie)) - return false; - - /* - * Ignore mesh capability (last two bytes of the IE) when - * comparing since that may differ between stations taking - * part in the same mesh. - */ - return memcmp(ie + 2, meshcfg, - sizeof(struct ieee80211_meshconf_ie) - 2) == 0; -} +/** + * enum bss_compare_mode - BSS compare mode + * @BSS_CMP_REGULAR: regular compare mode (for insertion and normal find) + * @BSS_CMP_HIDE_ZLEN: find hidden SSID with zero-length mode + * @BSS_CMP_HIDE_NUL: find hidden SSID with NUL-ed out mode + */ +enum bss_compare_mode { + BSS_CMP_REGULAR, + BSS_CMP_HIDE_ZLEN, + BSS_CMP_HIDE_NUL, +}; -static int cmp_bss_core(struct cfg80211_bss *a, struct cfg80211_bss *b) +static int cmp_bss(struct cfg80211_bss *a, + struct cfg80211_bss *b, + enum bss_compare_mode mode) { const struct cfg80211_bss_ies *a_ies, *b_ies; - int r; + const u8 *ie1 = NULL; + const u8 *ie2 = NULL; + int i, r; if (a->channel != b->channel) return b->channel->center_freq - a->channel->center_freq; - if (is_mesh_bss(a) && is_mesh_bss(b)) { - a_ies = rcu_access_pointer(a->ies); - if (!a_ies) - return -1; - b_ies = rcu_access_pointer(b->ies); - if (!b_ies) - return 1; - - r = cmp_ies(WLAN_EID_MESH_ID, - a_ies->data, a_ies->len, - b_ies->data, b_ies->len); - if (r) - return r; - return cmp_ies(WLAN_EID_MESH_CONFIG, - a_ies->data, a_ies->len, - b_ies->data, b_ies->len); - } - - /* - * we can't use compare_ether_addr here since we need a < > operator. - * The binary return value of compare_ether_addr isn't enough - */ - return memcmp(a->bssid, b->bssid, sizeof(a->bssid)); -} - -static int cmp_bss(struct cfg80211_bss *a, - struct cfg80211_bss *b) -{ - const struct cfg80211_bss_ies *a_ies, *b_ies; - int r; - - r = cmp_bss_core(a, b); - if (r) - return r; - a_ies = rcu_access_pointer(a->ies); if (!a_ies) return -1; @@ -444,42 +437,51 @@ static int cmp_bss(struct cfg80211_bss *a, if (!b_ies) return 1; - return cmp_ies(WLAN_EID_SSID, - a_ies->data, a_ies->len, - b_ies->data, b_ies->len); -} - -static int cmp_hidden_bss(struct cfg80211_bss *a, struct cfg80211_bss *b) -{ - const struct cfg80211_bss_ies *a_ies, *b_ies; - const u8 *ie1; - const u8 *ie2; - int i; - int r; + if (WLAN_CAPABILITY_IS_STA_BSS(a->capability)) + ie1 = cfg80211_find_ie(WLAN_EID_MESH_ID, + a_ies->data, a_ies->len); + if (WLAN_CAPABILITY_IS_STA_BSS(b->capability)) + ie2 = cfg80211_find_ie(WLAN_EID_MESH_ID, + b_ies->data, b_ies->len); + if (ie1 && ie2) { + int mesh_id_cmp; + + if (ie1[1] == ie2[1]) + mesh_id_cmp = memcmp(ie1 + 2, ie2 + 2, ie1[1]); + else + mesh_id_cmp = ie2[1] - ie1[1]; + + ie1 = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, + a_ies->data, a_ies->len); + ie2 = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, + b_ies->data, b_ies->len); + if (ie1 && ie2) { + if (mesh_id_cmp) + return mesh_id_cmp; + if (ie1[1] != ie2[1]) + return ie2[1] - ie1[1]; + return memcmp(ie1 + 2, ie2 + 2, ie1[1]); + } + } - r = cmp_bss_core(a, b); + /* + * we can't use compare_ether_addr here since we need a < > operator. + * The binary return value of compare_ether_addr isn't enough + */ + r = memcmp(a->bssid, b->bssid, sizeof(a->bssid)); if (r) return r; - a_ies = rcu_access_pointer(a->ies); - if (!a_ies) - return -1; - b_ies = rcu_access_pointer(b->ies); - if (!b_ies) - return 1; - ie1 = cfg80211_find_ie(WLAN_EID_SSID, a_ies->data, a_ies->len); ie2 = cfg80211_find_ie(WLAN_EID_SSID, b_ies->data, b_ies->len); + if (!ie1 && !ie2) + return 0; + /* - * Key comparator must use same algorithm in any rb-tree - * search function (order is important), otherwise ordering - * of items in the tree is broken and search gives incorrect - * results. This code uses same order as cmp_ies() does. - * - * Note that due to the differring behaviour with hidden SSIDs - * this function only works when "b" is the tree element and - * "a" is the key we're looking for. + * Note that with "hide_ssid", the function returns a match if + * the already-present BSS ("b") is a hidden SSID beacon for + * the new BSS ("a"). */ /* sort missing IE before (left of) present IE */ @@ -488,24 +490,36 @@ static int cmp_hidden_bss(struct cfg80211_bss *a, struct cfg80211_bss *b) if (!ie2) return 1; - /* zero-size SSID is used as an indication of the hidden bss */ - if (!ie2[1]) + switch (mode) { + case BSS_CMP_HIDE_ZLEN: + /* + * In ZLEN mode we assume the BSS entry we're + * looking for has a zero-length SSID. So if + * the one we're looking at right now has that, + * return 0. Otherwise, return the difference + * in length, but since we're looking for the + * 0-length it's really equivalent to returning + * the length of the one we're looking at. + * + * No content comparison is needed as we assume + * the content length is zero. + */ + return ie2[1]; + case BSS_CMP_REGULAR: + default: + /* sort by length first, then by contents */ + if (ie1[1] != ie2[1]) + return ie2[1] - ie1[1]; + return memcmp(ie1 + 2, ie2 + 2, ie1[1]); + case BSS_CMP_HIDE_NUL: + if (ie1[1] != ie2[1]) + return ie2[1] - ie1[1]; + /* this is equivalent to memcmp(zeroes, ie2 + 2, len) */ + for (i = 0; i < ie2[1]; i++) + if (ie2[i + 2]) + return -1; return 0; - - /* sort by length first, then by contents */ - if (ie1[1] != ie2[1]) - return ie2[1] - ie1[1]; - - /* - * zeroed SSID ie is another indication of a hidden bss; - * if it isn't zeroed just return the regular sort value - * to find the next candidate - */ - for (i = 0; i < ie2[1]; i++) - if (ie2[i + 2]) - return memcmp(ie1 + 2, ie2 + 2, ie1[1]); - - return 0; + } } struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, @@ -534,7 +548,7 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, continue; if (is_bss(&bss->pub, bssid, ssid, ssid_len)) { res = bss; - kref_get(&res->ref); + bss_ref_get(dev, res); break; } } @@ -547,34 +561,6 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_get_bss); -struct cfg80211_bss *cfg80211_get_mesh(struct wiphy *wiphy, - struct ieee80211_channel *channel, - const u8 *meshid, size_t meshidlen, - const u8 *meshcfg) -{ - struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); - struct cfg80211_internal_bss *bss, *res = NULL; - - spin_lock_bh(&dev->bss_lock); - - list_for_each_entry(bss, &dev->bss_list, list) { - if (channel && bss->pub.channel != channel) - continue; - if (is_mesh(&bss->pub, meshid, meshidlen, meshcfg)) { - res = bss; - kref_get(&res->ref); - break; - } - } - - spin_unlock_bh(&dev->bss_lock); - if (!res) - return NULL; - return &res->pub; -} -EXPORT_SYMBOL(cfg80211_get_mesh); - - static void rb_insert_bss(struct cfg80211_registered_device *dev, struct cfg80211_internal_bss *bss) { @@ -587,7 +573,7 @@ static void rb_insert_bss(struct cfg80211_registered_device *dev, parent = *p; tbss = rb_entry(parent, struct cfg80211_internal_bss, rbn); - cmp = cmp_bss(&bss->pub, &tbss->pub); + cmp = cmp_bss(&bss->pub, &tbss->pub, BSS_CMP_REGULAR); if (WARN_ON(!cmp)) { /* will sort of leak this BSS */ @@ -606,7 +592,8 @@ static void rb_insert_bss(struct cfg80211_registered_device *dev, static struct cfg80211_internal_bss * rb_find_bss(struct cfg80211_registered_device *dev, - struct cfg80211_internal_bss *res) + struct cfg80211_internal_bss *res, + enum bss_compare_mode mode) { struct rb_node *n = dev->bss_tree.rb_node; struct cfg80211_internal_bss *bss; @@ -614,7 +601,7 @@ rb_find_bss(struct cfg80211_registered_device *dev, while (n) { bss = rb_entry(n, struct cfg80211_internal_bss, rbn); - r = cmp_bss(&res->pub, &bss->pub); + r = cmp_bss(&res->pub, &bss->pub, mode); if (r == 0) return bss; @@ -627,46 +614,67 @@ rb_find_bss(struct cfg80211_registered_device *dev, return NULL; } -static struct cfg80211_internal_bss * -rb_find_hidden_bss(struct cfg80211_registered_device *dev, - struct cfg80211_internal_bss *res) +static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, + struct cfg80211_internal_bss *new) { - struct rb_node *n = dev->bss_tree.rb_node; + const struct cfg80211_bss_ies *ies; struct cfg80211_internal_bss *bss; - int r; + const u8 *ie; + int i, ssidlen; + u8 fold = 0; - while (n) { - bss = rb_entry(n, struct cfg80211_internal_bss, rbn); - r = cmp_hidden_bss(&res->pub, &bss->pub); + ies = rcu_access_pointer(new->pub.beacon_ies); + if (WARN_ON(!ies)) + return false; - if (r == 0) - return bss; - else if (r < 0) - n = n->rb_left; - else - n = n->rb_right; + ie = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len); + if (!ie) { + /* nothing to do */ + return true; } - return NULL; -} + ssidlen = ie[1]; + for (i = 0; i < ssidlen; i++) + fold |= ie[2 + i]; -static void -copy_hidden_ies(struct cfg80211_internal_bss *res, - struct cfg80211_internal_bss *hidden) -{ - const struct cfg80211_bss_ies *ies; + if (fold) { + /* not a hidden SSID */ + return true; + } - if (rcu_access_pointer(res->pub.beacon_ies)) - return; + /* This is the bad part ... */ - ies = rcu_access_pointer(hidden->pub.beacon_ies); - if (WARN_ON(!ies)) - return; + list_for_each_entry(bss, &dev->bss_list, list) { + if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid)) + continue; + if (bss->pub.channel != new->pub.channel) + continue; + if (rcu_access_pointer(bss->pub.beacon_ies)) + continue; + ies = rcu_access_pointer(bss->pub.ies); + if (!ies) + continue; + ie = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len); + if (!ie) + continue; + if (ssidlen && ie[1] != ssidlen) + continue; + /* that would be odd ... */ + if (bss->pub.beacon_ies) + continue; + if (WARN_ON_ONCE(bss->pub.hidden_beacon_bss)) + continue; + if (WARN_ON_ONCE(!list_empty(&bss->hidden_list))) + list_del(&bss->hidden_list); + /* combine them */ + list_add(&bss->hidden_list, &new->hidden_list); + bss->pub.hidden_beacon_bss = &new->pub; + new->refcount += bss->refcount; + rcu_assign_pointer(bss->pub.beacon_ies, + new->pub.beacon_ies); + } - ies = kmemdup(ies, sizeof(*ies) + ies->len, GFP_ATOMIC); - if (unlikely(!ies)) - return; - rcu_assign_pointer(res->pub.beacon_ies, ies); + return true; } static struct cfg80211_internal_bss * @@ -687,15 +695,9 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, return NULL; } - found = rb_find_bss(dev, tmp); + found = rb_find_bss(dev, tmp, BSS_CMP_REGULAR); if (found) { - found->pub.beacon_interval = tmp->pub.beacon_interval; - found->pub.tsf = tmp->pub.tsf; - found->pub.signal = tmp->pub.signal; - found->pub.capability = tmp->pub.capability; - found->ts = tmp->ts; - /* Update IEs */ if (rcu_access_pointer(tmp->pub.proberesp_ies)) { const struct cfg80211_bss_ies *old; @@ -711,41 +713,65 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); } else if (rcu_access_pointer(tmp->pub.beacon_ies)) { - const struct cfg80211_bss_ies *old, *ies; + const struct cfg80211_bss_ies *old; + struct cfg80211_internal_bss *bss; + + if (found->pub.hidden_beacon_bss && + !list_empty(&found->hidden_list)) { + const struct cfg80211_bss_ies *f; + + /* + * The found BSS struct is one of the probe + * response members of a group, but we're + * receiving a beacon (beacon_ies in the tmp + * bss is used). This can only mean that the + * AP changed its beacon from not having an + * SSID to showing it, which is confusing so + * drop this information. + */ + + f = rcu_access_pointer(tmp->pub.beacon_ies); + kfree_rcu((struct cfg80211_bss_ies *)f, + rcu_head); + goto drop; + } old = rcu_access_pointer(found->pub.beacon_ies); - ies = rcu_access_pointer(found->pub.ies); rcu_assign_pointer(found->pub.beacon_ies, tmp->pub.beacon_ies); /* Override IEs if they were from a beacon before */ - if (old == ies) + if (old == rcu_access_pointer(found->pub.ies)) rcu_assign_pointer(found->pub.ies, tmp->pub.beacon_ies); + /* Assign beacon IEs to all sub entries */ + list_for_each_entry(bss, &found->hidden_list, + hidden_list) { + const struct cfg80211_bss_ies *ies; + + ies = rcu_access_pointer(bss->pub.beacon_ies); + WARN_ON(ies != old); + + rcu_assign_pointer(bss->pub.beacon_ies, + tmp->pub.beacon_ies); + } + if (old) kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); } + + found->pub.beacon_interval = tmp->pub.beacon_interval; + found->pub.signal = tmp->pub.signal; + found->pub.capability = tmp->pub.capability; + found->ts = tmp->ts; } else { struct cfg80211_internal_bss *new; struct cfg80211_internal_bss *hidden; struct cfg80211_bss_ies *ies; - /* First check if the beacon is a probe response from - * a hidden bss. If so, copy beacon ies (with nullified - * ssid) into the probe response bss entry (with real ssid). - * It is required basically for PSM implementation - * (probe responses do not contain tim ie) */ - - /* TODO: The code is not trying to update existing probe - * response bss entries when beacon ies are - * getting changed. */ - hidden = rb_find_hidden_bss(dev, tmp); - if (hidden) - copy_hidden_ies(tmp, hidden); - /* * create a copy -- the "res" variable that is passed in * is allocated on the stack since it's not needed in the @@ -760,21 +786,51 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, ies = (void *)rcu_dereference(tmp->pub.proberesp_ies); if (ies) kfree_rcu(ies, rcu_head); - spin_unlock_bh(&dev->bss_lock); - return NULL; + goto drop; } memcpy(new, tmp, sizeof(*new)); - kref_init(&new->ref); + new->refcount = 1; + INIT_LIST_HEAD(&new->hidden_list); + + if (rcu_access_pointer(tmp->pub.proberesp_ies)) { + hidden = rb_find_bss(dev, tmp, BSS_CMP_HIDE_ZLEN); + if (!hidden) + hidden = rb_find_bss(dev, tmp, + BSS_CMP_HIDE_NUL); + if (hidden) { + new->pub.hidden_beacon_bss = &hidden->pub; + list_add(&new->hidden_list, + &hidden->hidden_list); + hidden->refcount++; + rcu_assign_pointer(new->pub.beacon_ies, + hidden->pub.beacon_ies); + } + } else { + /* + * Ok so we found a beacon, and don't have an entry. If + * it's a beacon with hidden SSID, we might be in for an + * expensive search for any probe responses that should + * be grouped with this beacon for updates ... + */ + if (!cfg80211_combine_bsses(dev, new)) { + kfree(new); + goto drop; + } + } + list_add_tail(&new->list, &dev->bss_list); rb_insert_bss(dev, new); found = new; } dev->bss_generation++; + bss_ref_get(dev, found); spin_unlock_bh(&dev->bss_lock); - kref_get(&found->ref); return found; + drop: + spin_unlock_bh(&dev->bss_lock); + return NULL; } static struct ieee80211_channel * @@ -833,7 +889,6 @@ cfg80211_inform_bss(struct wiphy *wiphy, memcpy(tmp.pub.bssid, bssid, ETH_ALEN); tmp.pub.channel = channel; tmp.pub.signal = signal; - tmp.pub.tsf = tsf; tmp.pub.beacon_interval = beacon_interval; tmp.pub.capability = capability; /* @@ -841,16 +896,14 @@ cfg80211_inform_bss(struct wiphy *wiphy, * Response frame, we need to pick one of the options and only use it * with the driver that does not provide the full Beacon/Probe Response * frame. Use Beacon frame pointer to avoid indicating that this should - * override the iies pointer should we have received an earlier + * override the IEs pointer should we have received an earlier * indication of Probe Response data. - * - * The initial buffer for the IEs is allocated with the BSS entry and - * is located after the private area. */ ies = kmalloc(sizeof(*ies) + ielen, gfp); if (!ies) return NULL; ies->len = ielen; + ies->tsf = tsf; memcpy(ies->data, ie, ielen); rcu_assign_pointer(tmp.pub.beacon_ies, ies); @@ -907,6 +960,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, if (!ies) return NULL; ies->len = ielen; + ies->tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); memcpy(ies->data, mgmt->u.probe_resp.variable, ielen); if (ieee80211_is_probe_resp(mgmt->frame_control)) @@ -918,7 +972,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, memcpy(tmp.pub.bssid, mgmt->bssid, ETH_ALEN); tmp.pub.channel = channel; tmp.pub.signal = signal; - tmp.pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); @@ -935,27 +988,35 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_inform_bss_frame); -void cfg80211_ref_bss(struct cfg80211_bss *pub) +void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) { + struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); struct cfg80211_internal_bss *bss; if (!pub) return; bss = container_of(pub, struct cfg80211_internal_bss, pub); - kref_get(&bss->ref); + + spin_lock_bh(&dev->bss_lock); + bss_ref_get(dev, bss); + spin_unlock_bh(&dev->bss_lock); } EXPORT_SYMBOL(cfg80211_ref_bss); -void cfg80211_put_bss(struct cfg80211_bss *pub) +void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) { + struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); struct cfg80211_internal_bss *bss; if (!pub) return; bss = container_of(pub, struct cfg80211_internal_bss, pub); - kref_put(&bss->ref, bss_release); + + spin_lock_bh(&dev->bss_lock); + bss_ref_put(dev, bss); + spin_unlock_bh(&dev->bss_lock); } EXPORT_SYMBOL(cfg80211_put_bss); @@ -971,8 +1032,8 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) spin_lock_bh(&dev->bss_lock); if (!list_empty(&bss->list)) { - __cfg80211_unlink_bss(dev, bss); - dev->bss_generation++; + if (__cfg80211_unlink_bss(dev, bss)) + dev->bss_generation++; } spin_unlock_bh(&dev->bss_lock); } @@ -1001,6 +1062,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); + mutex_lock(&rdev->sched_scan_mtx); if (rdev->scan_req) { err = -EBUSY; goto out; @@ -1107,6 +1169,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, dev_hold(dev); } out: + mutex_unlock(&rdev->sched_scan_mtx); kfree(creq); cfg80211_unlock_rdev(rdev); return err; @@ -1155,16 +1218,6 @@ static void ieee80211_scan_add_ies(struct iw_request_info *info, } } -static inline unsigned int elapsed_jiffies_msecs(unsigned long start) -{ - unsigned long end = jiffies; - - if (end >= start) - return jiffies_to_msecs(end - start); - - return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1); -} - static char * ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, struct cfg80211_internal_bss *bss, char *current_ev, @@ -1241,15 +1294,10 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, rcu_read_lock(); ies = rcu_dereference(bss->pub.ies); - if (ies) { - rem = ies->len; - ie = ies->data; - } else { - rem = 0; - ie = NULL; - } + rem = ies->len; + ie = ies->data; - while (ies && rem >= 2) { + while (rem >= 2) { /* invalid data */ if (ie[1] > rem - 2) break; @@ -1358,11 +1406,11 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, &iwe, IW_EV_UINT_LEN); } - buf = kmalloc(30, GFP_ATOMIC); + buf = kmalloc(31, GFP_ATOMIC); if (buf) { memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVCUSTOM; - sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->pub.tsf)); + sprintf(buf, "tsf=%016llx", (unsigned long long)(ies->tsf)); iwe.u.data.length = strlen(buf); current_ev = iwe_stream_add_point(info, current_ev, end_buf, &iwe, buf); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f2431e4..482c70e 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -85,6 +85,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) ASSERT_RTNL(); ASSERT_RDEV_LOCK(rdev); ASSERT_WDEV_LOCK(wdev); + lockdep_assert_held(&rdev->sched_scan_mtx); if (rdev->scan_req) return -EBUSY; @@ -192,7 +193,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) prev_bssid, params->ssid, params->ssid_len, params->ie, params->ie_len, - false, ¶ms->crypto, + params->mfp != NL80211_MFP_NO, + ¶ms->crypto, params->flags, ¶ms->ht_capa, ¶ms->ht_capa_mask); if (err) @@ -222,6 +224,7 @@ void cfg80211_conn_work(struct work_struct *work) rtnl_lock(); cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { wdev_lock(wdev); @@ -246,6 +249,7 @@ void cfg80211_conn_work(struct work_struct *work) wdev_unlock(wdev); } + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); rtnl_unlock(); @@ -300,7 +304,7 @@ static void __cfg80211_sme_scan_done(struct net_device *dev) bss = cfg80211_get_conn_bss(wdev); if (bss) { - cfg80211_put_bss(bss); + cfg80211_put_bss(&rdev->wiphy, bss); } else { /* not found */ if (wdev->conn->state == CFG80211_CONN_SCAN_AGAIN) @@ -319,11 +323,9 @@ void cfg80211_sme_scan_done(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - mutex_lock(&wiphy_to_dev(wdev->wiphy)->devlist_mtx); wdev_lock(wdev); __cfg80211_sme_scan_done(dev); wdev_unlock(wdev); - mutex_unlock(&wiphy_to_dev(wdev->wiphy)->devlist_mtx); } void cfg80211_sme_rx_auth(struct net_device *dev, @@ -463,7 +465,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; } @@ -479,7 +481,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, kfree(wdev->connect_keys); wdev->connect_keys = NULL; wdev->ssid_len = 0; - cfg80211_put_bss(bss); + cfg80211_put_bss(wdev->wiphy, bss); return; } @@ -519,10 +521,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, * - country_ie + 2, the start of the country ie data, and * - and country_ie[1] which is the IE length */ - regulatory_hint_11d(wdev->wiphy, - bss->channel->band, - country_ie + 2, - country_ie[1]); + regulatory_hint_11d(wdev->wiphy, bss->channel->band, + country_ie + 2, country_ie[1]); kfree(country_ie); } @@ -587,7 +587,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev, } cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; cfg80211_hold_bss(bss_from_pub(bss)); @@ -622,7 +622,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev, return; out: - cfg80211_put_bss(bss); + cfg80211_put_bss(wdev->wiphy, bss); } void cfg80211_roamed(struct net_device *dev, @@ -664,7 +664,7 @@ void cfg80211_roamed_bss(struct net_device *dev, ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp); if (!ev) { - cfg80211_put_bss(bss); + cfg80211_put_bss(wdev->wiphy, bss); return; } @@ -705,7 +705,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); } wdev->current_bss = NULL; @@ -876,7 +876,7 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev, if (bss) { wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; err = cfg80211_conn_do_work(wdev); - cfg80211_put_bss(bss); + cfg80211_put_bss(wdev->wiphy, bss); } else { /* otherwise we'll need to scan for the AP first */ err = cfg80211_conn_scan(wdev); @@ -925,9 +925,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, int err; mutex_lock(&rdev->devlist_mtx); + /* might request scan - scan_mtx -> wdev_mtx dependency */ + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(dev->ieee80211_ptr); err = __cfg80211_connect(rdev, dev, connect, connkeys, NULL); wdev_unlock(dev->ieee80211_ptr); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); return err; diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 9bf6d5e..238ee49 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -77,13 +77,11 @@ static void wiphy_dev_release(struct device *dev) cfg80211_dev_free(rdev); } -#ifdef CONFIG_HOTPLUG static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env) { /* TODO, we probably need stuff here */ return 0; } -#endif static int wiphy_suspend(struct device *dev, pm_message_t state) { @@ -108,9 +106,7 @@ static int wiphy_resume(struct device *dev) int ret = 0; /* Age scan results with time spent in suspend */ - spin_lock_bh(&rdev->bss_lock); cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at); - spin_unlock_bh(&rdev->bss_lock); if (rdev->ops->resume) { rtnl_lock(); @@ -134,9 +130,7 @@ struct class ieee80211_class = { .owner = THIS_MODULE, .dev_release = wiphy_dev_release, .dev_attrs = ieee80211_dev_attrs, -#ifdef CONFIG_HOTPLUG .dev_uevent = wiphy_uevent, -#endif .suspend = wiphy_suspend, .resume = wiphy_resume, .ns_type = &net_ns_type_operations, diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 2134576..7586de7 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -27,7 +27,8 @@ #define WIPHY_PR_ARG __entry->wiphy_name #define WDEV_ENTRY __field(u32, id) -#define WDEV_ASSIGN (__entry->id) = (wdev ? wdev->identifier : 0) +#define WDEV_ASSIGN (__entry->id) = (!IS_ERR_OR_NULL(wdev) \ + ? wdev->identifier : 0) #define WDEV_PR_FMT "wdev(%u)" #define WDEV_PR_ARG (__entry->id) @@ -1767,6 +1768,24 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device, TP_ARGS(wiphy, wdev) ); +TRACE_EVENT(rdev_set_mac_acl, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_acl_data *params), + TP_ARGS(wiphy, netdev, params), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u32, acl_policy) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->acl_policy = params->acl_policy; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", acl policy: %d", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ @@ -2033,6 +2052,21 @@ TRACE_EVENT(cfg80211_reg_can_beacon, WIPHY_PR_ARG, CHAN_DEF_PR_ARG) ); +TRACE_EVENT(cfg80211_chandef_dfs_required, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, CHAN_DEF_PR_ARG) +); + TRACE_EVENT(cfg80211_ch_switch_notify, TP_PROTO(struct net_device *netdev, struct cfg80211_chan_def *chandef), @@ -2049,6 +2083,36 @@ TRACE_EVENT(cfg80211_ch_switch_notify, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) ); +TRACE_EVENT(cfg80211_radar_event, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, CHAN_DEF_PR_ARG) +); + +TRACE_EVENT(cfg80211_cac_event, + TP_PROTO(struct net_device *netdev, enum nl80211_radar_event evt), + TP_ARGS(netdev, evt), + TP_STRUCT__entry( + NETDEV_ENTRY + __field(enum nl80211_radar_event, evt) + ), + TP_fast_assign( + NETDEV_ASSIGN; + __entry->evt = evt; + ), + TP_printk(NETDEV_PR_FMT ", event: %d", + NETDEV_PR_ARG, __entry->evt) +); + DECLARE_EVENT_CLASS(cfg80211_rx_evt, TP_PROTO(struct net_device *netdev, const u8 *addr), TP_ARGS(netdev, addr), @@ -2315,6 +2379,41 @@ TRACE_EVENT(cfg80211_return_u32, TP_printk("ret: %u", __entry->ret) ); +TRACE_EVENT(cfg80211_report_wowlan_wakeup, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_wowlan_wakeup *wakeup), + TP_ARGS(wiphy, wdev, wakeup), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(bool, disconnect) + __field(bool, magic_pkt) + __field(bool, gtk_rekey_failure) + __field(bool, eap_identity_req) + __field(bool, four_way_handshake) + __field(bool, rfkill_release) + __field(s32, pattern_idx) + __field(u32, packet_len) + __dynamic_array(u8, packet, wakeup->packet_present_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->disconnect = wakeup->disconnect; + __entry->magic_pkt = wakeup->magic_pkt; + __entry->gtk_rekey_failure = wakeup->gtk_rekey_failure; + __entry->eap_identity_req = wakeup->eap_identity_req; + __entry->four_way_handshake = wakeup->four_way_handshake; + __entry->rfkill_release = wakeup->rfkill_release; + __entry->pattern_idx = wakeup->pattern_idx; + __entry->packet_len = wakeup->packet_len; + if (wakeup->packet && wakeup->packet_present_len) + memcpy(__get_dynamic_array(packet), wakeup->packet, + wakeup->packet_present_len); + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index 16d76a8..37a56ee 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1184,7 +1184,8 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode) + enum cfg80211_chan_mode chanmode, + u8 radar_detect) { struct wireless_dev *wdev_iter; u32 used_iftypes = BIT(iftype); @@ -1195,14 +1196,46 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chmode; int num_different_channels = 0; int total = 1; + bool radar_required; int i, j; ASSERT_RTNL(); lockdep_assert_held(&rdev->devlist_mtx); + if (WARN_ON(hweight32(radar_detect) > 1)) + return -EINVAL; + + switch (iftype) { + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_WDS: + radar_required = !!(chan && + (chan->flags & IEEE80211_CHAN_RADAR)); + break; + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_MONITOR: + radar_required = false; + break; + case NUM_NL80211_IFTYPES: + case NL80211_IFTYPE_UNSPECIFIED: + default: + return -EINVAL; + } + + if (radar_required && !radar_detect) + return -EINVAL; + /* Always allow software iftypes */ - if (rdev->wiphy.software_iftypes & BIT(iftype)) + if (rdev->wiphy.software_iftypes & BIT(iftype)) { + if (radar_detect) + return -EINVAL; return 0; + } memset(num, 0, sizeof(num)); memset(used_channels, 0, sizeof(used_channels)); @@ -1275,7 +1308,7 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, used_iftypes |= BIT(wdev_iter->iftype); } - if (total == 1) + if (total == 1 && !radar_detect) return 0; for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) { @@ -1308,6 +1341,9 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, } } + if (radar_detect && !(c->radar_detect_widths & radar_detect)) + goto cont; + /* * Finally check that all iftypes that we're currently * using are actually part of this combination. If they diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c index 8bafa31..e98a01c 100644 --- a/net/wireless/wext-proc.c +++ b/net/wireless/wext-proc.c @@ -143,7 +143,8 @@ static const struct file_operations wireless_seq_fops = { int __net_init wext_proc_init(struct net *net) { /* Create /proc/net/wireless entry */ - if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops)) + if (!proc_create("wireless", S_IRUGO, net->proc_net, + &wireless_seq_fops)) return -ENOMEM; return 0; @@ -151,5 +152,5 @@ int __net_init wext_proc_init(struct net *net) void __net_exit wext_proc_exit(struct net *net) { - proc_net_remove(net, "wireless"); + remove_proc_entry("wireless", net->proc_net); } diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index fb9622f..e79cb5c 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -89,6 +89,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) { @@ -135,6 +136,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); return err; @@ -190,6 +192,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); err = 0; @@ -223,6 +226,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); return err; @@ -285,6 +289,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) { @@ -313,6 +318,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); return err; diff --git a/net/x25/Kconfig b/net/x25/Kconfig index e6759c9..c959312c 100644 --- a/net/x25/Kconfig +++ b/net/x25/Kconfig @@ -3,8 +3,7 @@ # config X25 - tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)" - depends on EXPERIMENTAL + tristate "CCITT X.25 Packet Layer" ---help--- X.25 is a set of standardized network protocols, similar in scope to frame relay; the one physical line from your box to the X.25 network diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index a306bc6..37ca969 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -208,11 +208,10 @@ static void x25_remove_socket(struct sock *sk) static void x25_kill_by_device(struct net_device *dev) { struct sock *s; - struct hlist_node *node; write_lock_bh(&x25_list_lock); - sk_for_each(s, node, &x25_list) + sk_for_each(s, &x25_list) if (x25_sk(s)->neighbour && x25_sk(s)->neighbour->dev == dev) x25_disconnect(s, ENETUNREACH, 0, 0); @@ -280,12 +279,11 @@ static struct sock *x25_find_listener(struct x25_address *addr, { struct sock *s; struct sock *next_best; - struct hlist_node *node; read_lock_bh(&x25_list_lock); next_best = NULL; - sk_for_each(s, node, &x25_list) + sk_for_each(s, &x25_list) if ((!strcmp(addr->x25_addr, x25_sk(s)->source_addr.x25_addr) || !strcmp(addr->x25_addr, @@ -323,9 +321,8 @@ found: static struct sock *__x25_find_socket(unsigned int lci, struct x25_neigh *nb) { struct sock *s; - struct hlist_node *node; - sk_for_each(s, node, &x25_list) + sk_for_each(s, &x25_list) if (x25_sk(s)->lci == lci && x25_sk(s)->neighbour == nb) { sock_hold(s); goto found; @@ -1782,11 +1779,10 @@ static struct notifier_block x25_dev_notifier = { void x25_kill_by_neigh(struct x25_neigh *nb) { struct sock *s; - struct hlist_node *node; write_lock_bh(&x25_list_lock); - sk_for_each(s, node, &x25_list) + sk_for_each(s, &x25_list) if (x25_sk(s)->neighbour == nb) x25_disconnect(s, ENETUNREACH, 0, 0); diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index ce90b8d..bda1a13 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -21,8 +21,8 @@ config XFRM_USER If unsure, say Y. config XFRM_SUB_POLICY - bool "Transformation sub policy support (EXPERIMENTAL)" - depends on XFRM && EXPERIMENTAL + bool "Transformation sub policy support" + depends on XFRM ---help--- Support sub policy for developers. By using sub policy with main one, two policies can be applied to the same packet at once. @@ -31,8 +31,8 @@ config XFRM_SUB_POLICY If unsure, say N. config XFRM_MIGRATE - bool "Transformation migrate database (EXPERIMENTAL)" - depends on XFRM && EXPERIMENTAL + bool "Transformation migrate database" + depends on XFRM ---help--- A feature to update locator(s) of a given IPsec security association dynamically. This feature is required, for @@ -42,8 +42,8 @@ config XFRM_MIGRATE If unsure, say N. config XFRM_STATISTICS - bool "Transformation statistics (EXPERIMENTAL)" - depends on INET && XFRM && PROC_FS && EXPERIMENTAL + bool "Transformation statistics" + depends on INET && XFRM && PROC_FS ---help--- This statistics is not a SNMP/MIB specification but shows statistics about transformation error (or almost error) factor @@ -68,8 +68,8 @@ config NET_KEY Say Y unless you know what you are doing. config NET_KEY_MIGRATE - bool "PF_KEY MIGRATE (EXPERIMENTAL)" - depends on NET_KEY && EXPERIMENTAL + bool "PF_KEY MIGRATE" + depends on NET_KEY select XFRM_MIGRATE ---help--- Add a PF_KEY MIGRATE message to PF_KEYv2 socket family. diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 4ce2d93..6fb9d00 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -35,6 +35,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV8, .sadb_alg_ivlen = 8, @@ -51,6 +53,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV12, .sadb_alg_ivlen = 8, @@ -67,6 +71,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV16, .sadb_alg_ivlen = 8, @@ -83,6 +89,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV8, .sadb_alg_ivlen = 8, @@ -99,6 +107,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV12, .sadb_alg_ivlen = 8, @@ -115,6 +125,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV16, .sadb_alg_ivlen = 8, @@ -131,6 +143,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_NULL_AES_GMAC, .sadb_alg_ivlen = 8, @@ -151,6 +165,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_NULL, .sadb_alg_ivlen = 0, @@ -169,6 +185,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_AALG_MD5HMAC, .sadb_alg_ivlen = 0, @@ -187,6 +205,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_AALG_SHA1HMAC, .sadb_alg_ivlen = 0, @@ -205,6 +225,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_SHA2_256HMAC, .sadb_alg_ivlen = 0, @@ -222,6 +244,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_SHA2_384HMAC, .sadb_alg_ivlen = 0, @@ -239,6 +263,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_SHA2_512HMAC, .sadb_alg_ivlen = 0, @@ -257,6 +283,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC, .sadb_alg_ivlen = 0, @@ -274,6 +302,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_AES_XCBC_MAC, .sadb_alg_ivlen = 0, @@ -295,6 +325,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_EALG_NULL, .sadb_alg_ivlen = 0, @@ -313,6 +345,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_EALG_DESCBC, .sadb_alg_ivlen = 8, @@ -331,6 +365,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_EALG_3DESCBC, .sadb_alg_ivlen = 8, @@ -349,6 +385,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_CASTCBC, .sadb_alg_ivlen = 8, @@ -367,6 +405,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_BLOWFISHCBC, .sadb_alg_ivlen = 8, @@ -385,6 +425,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AESCBC, .sadb_alg_ivlen = 8, @@ -403,6 +445,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_SERPENTCBC, .sadb_alg_ivlen = 8, @@ -421,6 +465,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_CAMELLIACBC, .sadb_alg_ivlen = 8, @@ -439,6 +485,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_TWOFISHCBC, .sadb_alg_ivlen = 8, @@ -456,6 +504,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AESCTR, .sadb_alg_ivlen = 8, @@ -473,6 +523,7 @@ static struct xfrm_algo_desc calg_list[] = { .threshold = 90, } }, + .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE } }, { @@ -482,6 +533,7 @@ static struct xfrm_algo_desc calg_list[] = { .threshold = 90, } }, + .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_LZS } }, { @@ -491,6 +543,7 @@ static struct xfrm_algo_desc calg_list[] = { .threshold = 50, } }, + .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_LZJH } }, }; @@ -700,8 +753,7 @@ void xfrm_probe_algs(void) } for (i = 0; i < ealg_entries(); i++) { - status = crypto_has_blkcipher(ealg_list[i].name, 0, - CRYPTO_ALG_ASYNC); + status = crypto_has_ablkcipher(ealg_list[i].name, 0, 0); if (ealg_list[i].available != status) ealg_list[i].available = status; } @@ -715,27 +767,27 @@ void xfrm_probe_algs(void) } EXPORT_SYMBOL_GPL(xfrm_probe_algs); -int xfrm_count_auth_supported(void) +int xfrm_count_pfkey_auth_supported(void) { int i, n; for (i = 0, n = 0; i < aalg_entries(); i++) - if (aalg_list[i].available) + if (aalg_list[i].available && aalg_list[i].pfkey_supported) n++; return n; } -EXPORT_SYMBOL_GPL(xfrm_count_auth_supported); +EXPORT_SYMBOL_GPL(xfrm_count_pfkey_auth_supported); -int xfrm_count_enc_supported(void) +int xfrm_count_pfkey_enc_supported(void) { int i, n; for (i = 0, n = 0; i < ealg_entries(); i++) - if (ealg_list[i].available) + if (ealg_list[i].available && ealg_list[i].pfkey_supported) n++; return n; } -EXPORT_SYMBOL_GPL(xfrm_count_enc_supported); +EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported); #if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 95a338c..bcfda89 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -61,6 +61,12 @@ static int xfrm_output_one(struct sk_buff *skb, int err) } spin_lock_bh(&x->lock); + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); + goto error; + } + err = xfrm_state_check_expire(x); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 41eabc4..167c67d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -35,6 +35,10 @@ #include "xfrm_hash.h" +#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10)) +#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) +#define XFRM_MAX_QUEUE_LEN 100 + DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); @@ -51,7 +55,7 @@ static struct kmem_cache *xfrm_dst_cache __read_mostly; static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_dst *xdst); - +static void xfrm_policy_queue_process(unsigned long arg); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir); @@ -287,8 +291,11 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); atomic_set(&policy->refcnt, 1); + skb_queue_head_init(&policy->polq.hold_queue); setup_timer(&policy->timer, xfrm_policy_timer, (unsigned long)policy); + setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, + (unsigned long)policy); policy->flo.ops = &xfrm_policy_fc_ops; } return policy; @@ -309,6 +316,16 @@ void xfrm_policy_destroy(struct xfrm_policy *policy) } EXPORT_SYMBOL(xfrm_policy_destroy); +static void xfrm_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(list)) != NULL) { + dev_put(skb->dev); + kfree_skb(skb); + } +} + /* Rule must be locked. Release descentant resources, announce * entry dead. The rule must be unlinked from lists to the moment. */ @@ -319,6 +336,9 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) atomic_inc(&policy->genid); + del_timer(&policy->polq.hold_timer); + xfrm_queue_purge(&policy->polq.hold_queue); + if (del_timer(&policy->timer)) xfrm_pol_put(policy); @@ -359,27 +379,27 @@ static void xfrm_dst_hash_transfer(struct hlist_head *list, struct hlist_head *ndsttable, unsigned int nhashmask) { - struct hlist_node *entry, *tmp, *entry0 = NULL; + struct hlist_node *tmp, *entry0 = NULL; struct xfrm_policy *pol; unsigned int h0 = 0; redo: - hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) { + hlist_for_each_entry_safe(pol, tmp, list, bydst) { unsigned int h; h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, pol->family, nhashmask); if (!entry0) { - hlist_del(entry); + hlist_del(&pol->bydst); hlist_add_head(&pol->bydst, ndsttable+h); h0 = h; } else { if (h != h0) continue; - hlist_del(entry); + hlist_del(&pol->bydst); hlist_add_after(entry0, &pol->bydst); } - entry0 = entry; + entry0 = &pol->bydst; } if (!hlist_empty(list)) { entry0 = NULL; @@ -391,10 +411,10 @@ static void xfrm_idx_hash_transfer(struct hlist_head *list, struct hlist_head *nidxtable, unsigned int nhashmask) { - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; struct xfrm_policy *pol; - hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) { + hlist_for_each_entry_safe(pol, tmp, list, byidx) { unsigned int h; h = __idx_hash(pol->index, nhashmask); @@ -524,7 +544,6 @@ static u32 xfrm_gen_index(struct net *net, int dir) static u32 idx_generator; for (;;) { - struct hlist_node *entry; struct hlist_head *list; struct xfrm_policy *p; u32 idx; @@ -536,7 +555,7 @@ static u32 xfrm_gen_index(struct net *net, int dir) idx = 8; list = net->xfrm.policy_byidx + idx_hash(net, idx); found = 0; - hlist_for_each_entry(p, entry, list, byidx) { + hlist_for_each_entry(p, list, byidx) { if (p->index == idx) { found = 1; break; @@ -562,23 +581,62 @@ static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s return 0; } +static void xfrm_policy_requeue(struct xfrm_policy *old, + struct xfrm_policy *new) +{ + struct xfrm_policy_queue *pq = &old->polq; + struct sk_buff_head list; + + __skb_queue_head_init(&list); + + spin_lock_bh(&pq->hold_queue.lock); + skb_queue_splice_init(&pq->hold_queue, &list); + del_timer(&pq->hold_timer); + spin_unlock_bh(&pq->hold_queue.lock); + + if (skb_queue_empty(&list)) + return; + + pq = &new->polq; + + spin_lock_bh(&pq->hold_queue.lock); + skb_queue_splice(&list, &pq->hold_queue); + pq->timeout = XFRM_QUEUE_TMO_MIN; + mod_timer(&pq->hold_timer, jiffies); + spin_unlock_bh(&pq->hold_queue.lock); +} + +static bool xfrm_policy_mark_match(struct xfrm_policy *policy, + struct xfrm_policy *pol) +{ + u32 mark = policy->mark.v & policy->mark.m; + + if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m) + return true; + + if ((mark & pol->mark.m) == pol->mark.v && + policy->priority == pol->priority) + return true; + + return false; +} + int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { struct net *net = xp_net(policy); struct xfrm_policy *pol; struct xfrm_policy *delpol; struct hlist_head *chain; - struct hlist_node *entry, *newpos; - u32 mark = policy->mark.v & policy->mark.m; + struct hlist_node *newpos; write_lock_bh(&xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { if (pol->type == policy->type && !selector_cmp(&pol->selector, &policy->selector) && - (mark & pol->mark.m) == pol->mark.v && + xfrm_policy_mark_match(policy, pol) && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) { @@ -603,8 +661,10 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) net->xfrm.policy_count[dir]++; atomic_inc(&flow_cache_genid); rt_genid_bump(net); - if (delpol) + if (delpol) { + xfrm_policy_requeue(delpol, policy); __xfrm_policy_unlink(delpol, dir); + } policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); policy->curlft.add_time = get_seconds(); @@ -630,13 +690,12 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, { struct xfrm_policy *pol, *ret; struct hlist_head *chain; - struct hlist_node *entry; *err = 0; write_lock_bh(&xfrm_policy_lock); chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { if (pol->type == type && (mark & pol->mark.m) == pol->mark.v && !selector_cmp(sel, &pol->selector) && @@ -668,7 +727,6 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, { struct xfrm_policy *pol, *ret; struct hlist_head *chain; - struct hlist_node *entry; *err = -ENOENT; if (xfrm_policy_id2dir(id) != dir) @@ -678,7 +736,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, write_lock_bh(&xfrm_policy_lock); chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; - hlist_for_each_entry(pol, entry, chain, byidx) { + hlist_for_each_entry(pol, chain, byidx) { if (pol->type == type && pol->index == id && (mark & pol->mark.m) == pol->mark.v) { xfrm_pol_hold(pol); @@ -711,10 +769,9 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; - struct hlist_node *entry; int i; - hlist_for_each_entry(pol, entry, + hlist_for_each_entry(pol, &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; @@ -728,7 +785,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi } } for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, + hlist_for_each_entry(pol, net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) @@ -767,11 +824,10 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; - struct hlist_node *entry; int i; again1: - hlist_for_each_entry(pol, entry, + hlist_for_each_entry(pol, &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; @@ -791,7 +847,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { again2: - hlist_for_each_entry(pol, entry, + hlist_for_each_entry(pol, net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) @@ -919,7 +975,6 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, int err; struct xfrm_policy *pol, *ret; const xfrm_address_t *daddr, *saddr; - struct hlist_node *entry; struct hlist_head *chain; u32 priority = ~0U; @@ -931,7 +986,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, read_lock_bh(&xfrm_policy_lock); chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { if (err == -ESRCH) @@ -947,7 +1002,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } } chain = &net->xfrm.policy_inexact[dir]; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { if (err == -ESRCH) @@ -1115,11 +1170,15 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } - if (old_pol) + if (old_pol) { + if (pol) + xfrm_policy_requeue(old_pol, pol); + /* Unlinking succeeds always. This is the only function * allowed to delete or replace socket policy. */ __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); + } write_unlock_bh(&xfrm_policy_lock); if (old_pol) { @@ -1310,6 +1369,8 @@ static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *f * It means we need to try again resolving. */ if (xdst->num_xfrms > 0) return NULL; + } else if (dst->flags & DST_XFRM_QUEUE) { + return NULL; } else { /* Real bundle */ if (stale_bundle(dst)) @@ -1673,6 +1734,171 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, return xdst; } +static void xfrm_policy_queue_process(unsigned long arg) +{ + int err = 0; + struct sk_buff *skb; + struct sock *sk; + struct dst_entry *dst; + struct net_device *dev; + struct xfrm_policy *pol = (struct xfrm_policy *)arg; + struct xfrm_policy_queue *pq = &pol->polq; + struct flowi fl; + struct sk_buff_head list; + + spin_lock(&pq->hold_queue.lock); + skb = skb_peek(&pq->hold_queue); + dst = skb_dst(skb); + sk = skb->sk; + xfrm_decode_session(skb, &fl, dst->ops->family); + spin_unlock(&pq->hold_queue.lock); + + dst_hold(dst->path); + dst = xfrm_lookup(xp_net(pol), dst->path, &fl, + sk, 0); + if (IS_ERR(dst)) + goto purge_queue; + + if (dst->flags & DST_XFRM_QUEUE) { + dst_release(dst); + + if (pq->timeout >= XFRM_QUEUE_TMO_MAX) + goto purge_queue; + + pq->timeout = pq->timeout << 1; + mod_timer(&pq->hold_timer, jiffies + pq->timeout); + return; + } + + dst_release(dst); + + __skb_queue_head_init(&list); + + spin_lock(&pq->hold_queue.lock); + pq->timeout = 0; + skb_queue_splice_init(&pq->hold_queue, &list); + spin_unlock(&pq->hold_queue.lock); + + while (!skb_queue_empty(&list)) { + skb = __skb_dequeue(&list); + + xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); + dst_hold(skb_dst(skb)->path); + dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, + &fl, skb->sk, 0); + if (IS_ERR(dst)) { + dev_put(skb->dev); + kfree_skb(skb); + continue; + } + + nf_reset(skb); + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + dev = skb->dev; + err = dst_output(skb); + dev_put(dev); + } + + return; + +purge_queue: + pq->timeout = 0; + xfrm_queue_purge(&pq->hold_queue); +} + +static int xdst_queue_output(struct sk_buff *skb) +{ + unsigned long sched_next; + struct dst_entry *dst = skb_dst(skb); + struct xfrm_dst *xdst = (struct xfrm_dst *) dst; + struct xfrm_policy_queue *pq = &xdst->pols[0]->polq; + + if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { + kfree_skb(skb); + return -EAGAIN; + } + + skb_dst_force(skb); + dev_hold(skb->dev); + + spin_lock_bh(&pq->hold_queue.lock); + + if (!pq->timeout) + pq->timeout = XFRM_QUEUE_TMO_MIN; + + sched_next = jiffies + pq->timeout; + + if (del_timer(&pq->hold_timer)) { + if (time_before(pq->hold_timer.expires, sched_next)) + sched_next = pq->hold_timer.expires; + } + + __skb_queue_tail(&pq->hold_queue, skb); + mod_timer(&pq->hold_timer, sched_next); + + spin_unlock_bh(&pq->hold_queue.lock); + + return 0; +} + +static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, + struct dst_entry *dst, + const struct flowi *fl, + int num_xfrms, + u16 family) +{ + int err; + struct net_device *dev; + struct dst_entry *dst1; + struct xfrm_dst *xdst; + + xdst = xfrm_alloc_dst(net, family); + if (IS_ERR(xdst)) + return xdst; + + if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 || + (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP)) + return xdst; + + dst1 = &xdst->u.dst; + dst_hold(dst); + xdst->route = dst; + + dst_copy_metrics(dst1, dst); + + dst1->obsolete = DST_OBSOLETE_FORCE_CHK; + dst1->flags |= DST_HOST | DST_XFRM_QUEUE; + dst1->lastuse = jiffies; + + dst1->input = dst_discard; + dst1->output = xdst_queue_output; + + dst_hold(dst); + dst1->child = dst; + dst1->path = dst; + + xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); + + err = -ENODEV; + dev = dst->dev; + if (!dev) + goto free_dst; + + err = xfrm_fill_dst(xdst, dev, fl); + if (err) + goto free_dst; + +out: + return xdst; + +free_dst: + dst_release(dst1); + xdst = ERR_PTR(err); + goto out; +} + static struct flow_cache_object * xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *oldflo, void *ctx) @@ -1751,7 +1977,7 @@ make_dummy_bundle: /* We found policies, but there's no bundles to instantiate: * either because the policy blocks, has no transformations or * we could not build template (no xfrm_states).*/ - xdst = xfrm_alloc_dst(net, family); + xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); return ERR_CAST(xdst); @@ -2359,6 +2585,9 @@ static int xfrm_bundle_ok(struct xfrm_dst *first) (dst->dev && !netif_running(dst->dev))) return 0; + if (dst->flags & DST_XFRM_QUEUE) + return 1; + last = NULL; do { @@ -2656,7 +2885,7 @@ static void xfrm_policy_fini(struct net *net) WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); htab = &net->xfrm.policy_bydst[dir]; - sz = (htab->hmask + 1); + sz = (htab->hmask + 1) * sizeof(struct hlist_head); WARN_ON(!hlist_empty(htab->table)); xfrm_hash_free(htab->table, sz); } @@ -2786,10 +3015,10 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, { if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { if (sel_tgt->family == sel_cmp->family && - xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr, - sel_cmp->family) == 0 && - xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr, - sel_cmp->family) == 0 && + xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, + sel_cmp->family) && + xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, + sel_cmp->family) && sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { return true; @@ -2806,13 +3035,12 @@ static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector u8 dir, u8 type) { struct xfrm_policy *pol, *ret = NULL; - struct hlist_node *entry; struct hlist_head *chain; u32 priority = ~0U; read_lock_bh(&xfrm_policy_lock); chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir); - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { ret = pol; @@ -2821,7 +3049,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector } } chain = &init_net.xfrm.policy_inexact[dir]; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type && pol->priority < priority) { @@ -2847,10 +3075,10 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm switch (t->mode) { case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: - if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr, - m->old_family) == 0 && - xfrm_addr_cmp(&t->saddr, &m->old_saddr, - m->old_family) == 0) { + if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, + m->old_family) && + xfrm_addr_equal(&t->saddr, &m->old_saddr, + m->old_family)) { match = 1; } break; @@ -2916,10 +3144,10 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) return -EINVAL; for (i = 0; i < num_migrate; i++) { - if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr, - m[i].old_family) == 0) && - (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr, - m[i].old_family) == 0)) + if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr, + m[i].old_family) && + xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr, + m[i].old_family)) return -EINVAL; if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index d0a1af8..c721b0d 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -43,6 +43,7 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD), SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR), SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), + SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), SNMP_MIB_SENTINEL }; @@ -73,13 +74,13 @@ static const struct file_operations xfrm_statistics_seq_fops = { int __net_init xfrm_proc_init(struct net *net) { - if (!proc_net_fops_create(net, "xfrm_stat", S_IRUGO, - &xfrm_statistics_seq_fops)) + if (!proc_create("xfrm_stat", S_IRUGO, net->proc_net, + &xfrm_statistics_seq_fops)) return -ENOMEM; return 0; } void xfrm_proc_fini(struct net *net) { - proc_net_remove(net, "xfrm_stat"); + remove_proc_entry("xfrm_stat", net->proc_net); } diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 765f6fe..8dafe6d3 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -242,11 +242,13 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) u32 diff; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; u32 seq = ntohl(net_seq); - u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; + u32 pos; if (!replay_esn->replay_window) return; + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + if (seq > replay_esn->seq) { diff = seq - replay_esn->seq; @@ -332,6 +334,70 @@ static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) x->xflags &= ~XFRM_TIME_DEFER; } +static void xfrm_replay_notify_esn(struct xfrm_state *x, int event) +{ + u32 seq_diff, oseq_diff; + struct km_event c; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; + + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (!x->replay_maxdiff) + break; + + if (replay_esn->seq_hi == preplay_esn->seq_hi) + seq_diff = replay_esn->seq - preplay_esn->seq; + else + seq_diff = ~preplay_esn->seq + replay_esn->seq + 1; + + if (replay_esn->oseq_hi == preplay_esn->oseq_hi) + oseq_diff = replay_esn->oseq - preplay_esn->oseq; + else + oseq_diff = ~preplay_esn->oseq + replay_esn->oseq + 1; + + if (seq_diff < x->replay_maxdiff && + oseq_diff < x->replay_maxdiff) { + + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(x->replay_esn, x->preplay_esn, + xfrm_replay_state_esn_len(replay_esn)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(x->preplay_esn, x->replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb) { int err = 0; @@ -508,7 +574,7 @@ static struct xfrm_replay xfrm_replay_esn = { .advance = xfrm_replay_advance_esn, .check = xfrm_replay_check_esn, .recheck = xfrm_replay_recheck_esn, - .notify = xfrm_replay_notify_bmp, + .notify = xfrm_replay_notify_esn, .overflow = xfrm_replay_overflow_esn, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3459692..2c341bd 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -72,10 +72,10 @@ static void xfrm_hash_transfer(struct hlist_head *list, struct hlist_head *nspitable, unsigned int nhashmask) { - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; struct xfrm_state *x; - hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { + hlist_for_each_entry_safe(x, tmp, list, bydst) { unsigned int h; h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, @@ -158,8 +158,8 @@ out_unlock: mutex_unlock(&hash_resize_mutex); } -static DEFINE_RWLOCK(xfrm_state_afinfo_lock); -static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; +static DEFINE_SPINLOCK(xfrm_state_afinfo_lock); +static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO]; static DEFINE_SPINLOCK(xfrm_state_gc_lock); @@ -168,58 +168,45 @@ int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); void km_state_expired(struct xfrm_state *x, int hard, u32 portid); -static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family) -{ - struct xfrm_state_afinfo *afinfo; - if (unlikely(family >= NPROTO)) - return NULL; - write_lock_bh(&xfrm_state_afinfo_lock); - afinfo = xfrm_state_afinfo[family]; - if (unlikely(!afinfo)) - write_unlock_bh(&xfrm_state_afinfo_lock); - return afinfo; -} - -static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo) - __releases(xfrm_state_afinfo_lock) -{ - write_unlock_bh(&xfrm_state_afinfo_lock); -} - +static DEFINE_SPINLOCK(xfrm_type_lock); int xfrm_register_type(const struct xfrm_type *type, unsigned short family) { - struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); const struct xfrm_type **typemap; int err = 0; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; typemap = afinfo->type_map; + spin_lock_bh(&xfrm_type_lock); if (likely(typemap[type->proto] == NULL)) typemap[type->proto] = type; else err = -EEXIST; - xfrm_state_unlock_afinfo(afinfo); + spin_unlock_bh(&xfrm_type_lock); + xfrm_state_put_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_register_type); int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family) { - struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); const struct xfrm_type **typemap; int err = 0; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; typemap = afinfo->type_map; + spin_lock_bh(&xfrm_type_lock); if (unlikely(typemap[type->proto] != type)) err = -ENOENT; else typemap[type->proto] = NULL; - xfrm_state_unlock_afinfo(afinfo); + spin_unlock_bh(&xfrm_type_lock); + xfrm_state_put_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_unregister_type); @@ -256,6 +243,7 @@ static void xfrm_put_type(const struct xfrm_type *type) module_put(type->owner); } +static DEFINE_SPINLOCK(xfrm_mode_lock); int xfrm_register_mode(struct xfrm_mode *mode, int family) { struct xfrm_state_afinfo *afinfo; @@ -265,12 +253,13 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family) if (unlikely(mode->encap >= XFRM_MODE_MAX)) return -EINVAL; - afinfo = xfrm_state_lock_afinfo(family); + afinfo = xfrm_state_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; err = -EEXIST; modemap = afinfo->mode_map; + spin_lock_bh(&xfrm_mode_lock); if (modemap[mode->encap]) goto out; @@ -283,7 +272,8 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family) err = 0; out: - xfrm_state_unlock_afinfo(afinfo); + spin_unlock_bh(&xfrm_mode_lock); + xfrm_state_put_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_register_mode); @@ -297,19 +287,21 @@ int xfrm_unregister_mode(struct xfrm_mode *mode, int family) if (unlikely(mode->encap >= XFRM_MODE_MAX)) return -EINVAL; - afinfo = xfrm_state_lock_afinfo(family); + afinfo = xfrm_state_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; err = -ENOENT; modemap = afinfo->mode_map; + spin_lock_bh(&xfrm_mode_lock); if (likely(modemap[mode->encap] == mode)) { modemap[mode->encap] = NULL; module_put(mode->afinfo->owner); err = 0; } - xfrm_state_unlock_afinfo(afinfo); + spin_unlock_bh(&xfrm_mode_lock); + xfrm_state_put_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_unregister_mode); @@ -376,14 +368,14 @@ static void xfrm_state_gc_task(struct work_struct *work) { struct net *net = container_of(work, struct net, xfrm.state_gc_work); struct xfrm_state *x; - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); hlist_move_list(&net->xfrm.state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist) + hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); wake_up(&net->xfrm.km_waitq); @@ -585,10 +577,9 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi int i, err = 0; for (i = 0; i <= net->xfrm.state_hmask; i++) { - struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { xfrm_audit_state_delete(x, 0, @@ -621,10 +612,9 @@ int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info) err = -ESRCH; for (i = 0; i <= net->xfrm.state_hmask; i++) { - struct hlist_node *entry; struct xfrm_state *x; restart: - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -693,13 +683,12 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, { unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; - struct hlist_node *entry; - hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) { + hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto || - xfrm_addr_cmp(&x->id.daddr, daddr, family)) + !xfrm_addr_equal(&x->id.daddr, daddr, family)) continue; if ((mark & x->mark.m) != x->mark.v) @@ -718,13 +707,12 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, { unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; - struct hlist_node *entry; - hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || x->id.proto != proto || - xfrm_addr_cmp(&x->id.daddr, daddr, family) || - xfrm_addr_cmp(&x->props.saddr, saddr, family)) + !xfrm_addr_equal(&x->id.daddr, daddr, family) || + !xfrm_addr_equal(&x->props.saddr, saddr, family)) continue; if ((mark & x->mark.m) != x->mark.v) @@ -806,7 +794,6 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, static xfrm_address_t saddr_wildcard = { }; struct net *net = xp_net(pol); unsigned int h, h_wildcard; - struct hlist_node *entry; struct xfrm_state *x, *x0, *to_put; int acquire_in_progress = 0; int error = 0; @@ -818,7 +805,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, spin_lock_bh(&xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -834,7 +821,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, goto found; h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -914,11 +901,10 @@ xfrm_stateonly_find(struct net *net, u32 mark, { unsigned int h; struct xfrm_state *rx = NULL, *x = NULL; - struct hlist_node *entry; spin_lock(&xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, reqid, family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && (mark & x->mark.m) == x->mark.v && @@ -980,17 +966,16 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) unsigned short family = xnew->props.family; u32 reqid = xnew->props.reqid; struct xfrm_state *x; - struct hlist_node *entry; unsigned int h; u32 mark = xnew->mark.v & xnew->mark.m; h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && (mark & x->mark.m) == x->mark.v && - !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && - !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family)) + xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) && + xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family)) x->genid++; } } @@ -1012,11 +997,10 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, const xfrm_address_t *saddr, int create) { unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); - struct hlist_node *entry; struct xfrm_state *x; u32 mark = m->v & m->m; - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -1024,8 +1008,8 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, x->id.spi != 0 || x->id.proto != proto || (mark & x->mark.m) != x->mark.v || - xfrm_addr_cmp(&x->id.daddr, daddr, family) || - xfrm_addr_cmp(&x->props.saddr, saddr, family)) + !xfrm_addr_equal(&x->id.daddr, daddr, family) || + !xfrm_addr_equal(&x->props.saddr, saddr, family)) continue; xfrm_state_hold(x); @@ -1108,7 +1092,7 @@ int xfrm_state_add(struct xfrm_state *x) if (use_spi && x->km.seq) { x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq); if (x1 && ((x1->id.proto != x->id.proto) || - xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) { + !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) { to_put = x1; x1 = NULL; } @@ -1223,21 +1207,20 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) { unsigned int h; struct xfrm_state *x; - struct hlist_node *entry; if (m->reqid) { h = xfrm_dst_hash(&init_net, &m->old_daddr, &m->old_saddr, m->reqid, m->old_family); - hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, init_net.xfrm.state_bydst+h, bydst) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; if (m->reqid && x->props.reqid != m->reqid) continue; - if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr, - m->old_family) || - xfrm_addr_cmp(&x->props.saddr, &m->old_saddr, - m->old_family)) + if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, + m->old_family) || + !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, + m->old_family)) continue; xfrm_state_hold(x); return x; @@ -1245,14 +1228,14 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) } else { h = xfrm_src_hash(&init_net, &m->old_daddr, &m->old_saddr, m->old_family); - hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, init_net.xfrm.state_bysrc+h, bysrc) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; - if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr, - m->old_family) || - xfrm_addr_cmp(&x->props.saddr, &m->old_saddr, - m->old_family)) + if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, + m->old_family) || + !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, + m->old_family)) continue; xfrm_state_hold(x); return x; @@ -1277,7 +1260,7 @@ struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x, memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr)); /* add state */ - if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) { + if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) { /* a care is needed when the destination address of the state is to be updated as it is a part of triplet */ xfrm_state_insert(xc); @@ -1370,9 +1353,6 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (!x->curlft.use_time) x->curlft.use_time = get_seconds(); - if (x->km.state != XFRM_STATE_VALID) - return -EINVAL; - if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; @@ -1477,10 +1457,9 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s int i; for (i = 0; i <= net->xfrm.state_hmask; i++) { - struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (x->km.seq == seq && (mark & x->mark.m) == x->mark.v && x->km.state == XFRM_STATE_ACQ) { @@ -1648,27 +1627,26 @@ static void xfrm_replay_timer_handler(unsigned long data) } static LIST_HEAD(xfrm_km_list); -static DEFINE_RWLOCK(xfrm_km_lock); void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) if (km->notify_policy) km->notify_policy(xp, dir, c); - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); } void km_state_notify(struct xfrm_state *x, const struct km_event *c) { struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) if (km->notify) km->notify(x, c); - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); } EXPORT_SYMBOL(km_policy_notify); @@ -1698,13 +1676,13 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) int err = -EINVAL, acqret; struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { acqret = km->acquire(x, t, pol); if (!acqret) err = acqret; } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(km_query); @@ -1714,14 +1692,14 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) int err = -EINVAL; struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { if (km->new_mapping) err = km->new_mapping(x, ipaddr, sport); if (!err) break; } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(km_new_mapping); @@ -1750,15 +1728,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, int ret; struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { if (km->migrate) { ret = km->migrate(sel, dir, type, m, num_migrate, k); if (!ret) err = ret; } } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(km_migrate); @@ -1770,15 +1748,15 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address int ret; struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { if (km->report) { ret = km->report(net, proto, sel, addr); if (!ret) err = ret; } } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(km_report); @@ -1802,14 +1780,14 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen goto out; err = -EINVAL; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { pol = km->compile_policy(sk, optname, data, optlen, &err); if (err >= 0) break; } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); if (err >= 0) { xfrm_sk_policy_insert(sk, err, pol); @@ -1823,20 +1801,23 @@ out: } EXPORT_SYMBOL(xfrm_user_policy); +static DEFINE_SPINLOCK(xfrm_km_lock); + int xfrm_register_km(struct xfrm_mgr *km) { - write_lock_bh(&xfrm_km_lock); - list_add_tail(&km->list, &xfrm_km_list); - write_unlock_bh(&xfrm_km_lock); + spin_lock_bh(&xfrm_km_lock); + list_add_tail_rcu(&km->list, &xfrm_km_list); + spin_unlock_bh(&xfrm_km_lock); return 0; } EXPORT_SYMBOL(xfrm_register_km); int xfrm_unregister_km(struct xfrm_mgr *km) { - write_lock_bh(&xfrm_km_lock); - list_del(&km->list); - write_unlock_bh(&xfrm_km_lock); + spin_lock_bh(&xfrm_km_lock); + list_del_rcu(&km->list); + spin_unlock_bh(&xfrm_km_lock); + synchronize_rcu(); return 0; } EXPORT_SYMBOL(xfrm_unregister_km); @@ -1848,12 +1829,12 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock_bh(&xfrm_state_afinfo_lock); + spin_lock_bh(&xfrm_state_afinfo_lock); if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; else - xfrm_state_afinfo[afinfo->family] = afinfo; - write_unlock_bh(&xfrm_state_afinfo_lock); + rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo); + spin_unlock_bh(&xfrm_state_afinfo_lock); return err; } EXPORT_SYMBOL(xfrm_state_register_afinfo); @@ -1865,14 +1846,15 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock_bh(&xfrm_state_afinfo_lock); + spin_lock_bh(&xfrm_state_afinfo_lock); if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) err = -EINVAL; else - xfrm_state_afinfo[afinfo->family] = NULL; + RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL); } - write_unlock_bh(&xfrm_state_afinfo_lock); + spin_unlock_bh(&xfrm_state_afinfo_lock); + synchronize_rcu(); return err; } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); @@ -1882,17 +1864,16 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) struct xfrm_state_afinfo *afinfo; if (unlikely(family >= NPROTO)) return NULL; - read_lock(&xfrm_state_afinfo_lock); - afinfo = xfrm_state_afinfo[family]; + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_state_afinfo[family]); if (unlikely(!afinfo)) - read_unlock(&xfrm_state_afinfo_lock); + rcu_read_unlock(); return afinfo; } static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) - __releases(xfrm_state_afinfo_lock) { - read_unlock(&xfrm_state_afinfo_lock); + rcu_read_unlock(); } /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index eb872b2..fbd9e6c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1112,7 +1112,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, mark = xfrm_mark_get(attrs, &m); if (p->info.seq) { x = xfrm_find_acq_byseq(net, mark, p->info.seq); - if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) { + if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; } |