diff options
Diffstat (limited to 'net')
108 files changed, 1577 insertions, 920 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index bde9f3d..e3e5bf4 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -60,7 +60,7 @@ static const match_table_t tokens = { inline int p9_is_proto_dotl(struct p9_client *clnt) { - return (clnt->proto_version == p9_proto_2010L); + return (clnt->proto_version == p9_proto_2000L); } EXPORT_SYMBOL(p9_is_proto_dotl); @@ -80,9 +80,9 @@ static unsigned char get_protocol_version(const substring_t *name) } else if (!strncmp("9p2000.u", name->from, name->to-name->from)) { version = p9_proto_2000u; P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); - } else if (!strncmp("9p2010.L", name->from, name->to-name->from)) { - version = p9_proto_2010L; - P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2010.L\n"); + } else if (!strncmp("9p2000.L", name->from, name->to-name->from)) { + version = p9_proto_2000L; + P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); } else { P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ", name->from); @@ -672,9 +672,9 @@ int p9_client_version(struct p9_client *c) c->msize, c->proto_version); switch (c->proto_version) { - case p9_proto_2010L: + case p9_proto_2000L: req = p9_client_rpc(c, P9_TVERSION, "ds", - c->msize, "9P2010.L"); + c->msize, "9P2000.L"); break; case p9_proto_2000u: req = p9_client_rpc(c, P9_TVERSION, "ds", @@ -700,8 +700,8 @@ int p9_client_version(struct p9_client *c) } P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); - if (!strncmp(version, "9P2010.L", 8)) - c->proto_version = p9_proto_2010L; + if (!strncmp(version, "9P2000.L", 8)) + c->proto_version = p9_proto_2000L; else if (!strncmp(version, "9P2000.u", 8)) c->proto_version = p9_proto_2000u; else if (!strncmp(version, "9P2000", 6)) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 0aaed48..afde1a8 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -78,6 +78,12 @@ struct virtio_chan { /* Scatterlist: can be too big for stack. */ struct scatterlist sg[VIRTQUEUE_NUM]; + int tag_len; + /* + * tag name to identify a mount Non-null terminated + */ + char *tag; + struct list_head chan_list; }; @@ -214,6 +220,20 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) return 0; } +static ssize_t p9_mount_tag_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct virtio_chan *chan; + struct virtio_device *vdev; + + vdev = dev_to_virtio(dev); + chan = vdev->priv; + + return snprintf(buf, chan->tag_len + 1, "%s", chan->tag); +} + +static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL); + /** * p9_virtio_probe - probe for existence of 9P virtio channels * @vdev: virtio device to probe @@ -224,6 +244,8 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) static int p9_virtio_probe(struct virtio_device *vdev) { + __u16 tag_len; + char *tag; int err; struct virtio_chan *chan; @@ -248,6 +270,28 @@ static int p9_virtio_probe(struct virtio_device *vdev) sg_init_table(chan->sg, VIRTQUEUE_NUM); chan->inuse = false; + if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) { + vdev->config->get(vdev, + offsetof(struct virtio_9p_config, tag_len), + &tag_len, sizeof(tag_len)); + } else { + err = -EINVAL; + goto out_free_vq; + } + tag = kmalloc(tag_len, GFP_KERNEL); + if (!tag) { + err = -ENOMEM; + goto out_free_vq; + } + vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag), + tag, tag_len); + chan->tag = tag; + chan->tag_len = tag_len; + err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); + if (err) { + kfree(tag); + goto out_free_vq; + } mutex_lock(&virtio_9p_lock); list_add_tail(&chan->chan_list, &virtio_chan_list); mutex_unlock(&virtio_9p_lock); @@ -284,7 +328,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) mutex_lock(&virtio_9p_lock); list_for_each_entry(chan, &virtio_chan_list, chan_list) { - if (!strcmp(devname, dev_name(&chan->vdev->dev))) { + if (!strncmp(devname, chan->tag, chan->tag_len)) { if (!chan->inuse) { chan->inuse = true; found = 1; @@ -323,6 +367,8 @@ static void p9_virtio_remove(struct virtio_device *vdev) mutex_lock(&virtio_9p_lock); list_del(&chan->chan_list); mutex_unlock(&virtio_9p_lock); + sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); + kfree(chan->tag); kfree(chan); } @@ -332,13 +378,19 @@ static struct virtio_device_id id_table[] = { { 0 }, }; +static unsigned int features[] = { + VIRTIO_9P_MOUNT_TAG, +}; + /* The standard "struct lguest_driver": */ static struct virtio_driver p9_virtio_drv = { - .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, - .id_table = id_table, - .probe = p9_virtio_probe, - .remove = p9_virtio_remove, + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = p9_virtio_probe, + .remove = p9_virtio_remove, }; static struct p9_trans_module p9_virtio_trans = { diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index b6234b7..326ab45 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -87,7 +87,7 @@ static void bnep_net_set_mc_list(struct net_device *dev) memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN); r->len = htons(ETH_ALEN * 2); } else { - struct dev_mc_list *dmi = dev->mc_list; + struct dev_mc_list *dmi; int i, len = skb->len; if (dev->flags & IFF_BROADCAST) { @@ -97,12 +97,12 @@ static void bnep_net_set_mc_list(struct net_device *dev) /* FIXME: We should group addresses here. */ - for (i = 0; - i < netdev_mc_count(dev) && i < BNEP_MAX_MULTICAST_FILTERS; - i++) { + i = 0; + netdev_for_each_mc_addr(dmi, dev) { + if (i == BNEP_MAX_MULTICAST_FILTERS) + break; memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN); memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN); - dmi = dmi->next; } r->len = htons(skb->len - len); } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 1a79a6c..cafb55b 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -3,6 +3,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/debugfs.h> +#include <linux/seq_file.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -405,20 +406,11 @@ static struct device_type bt_host = { .release = bt_host_release, }; -static int inquiry_cache_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - -static ssize_t inquiry_cache_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) +static int inquiry_cache_show(struct seq_file *f, void *p) { - struct hci_dev *hdev = file->private_data; + struct hci_dev *hdev = f->private; struct inquiry_cache *cache = &hdev->inq_cache; struct inquiry_entry *e; - char buf[4096]; - int n = 0; hci_dev_lock_bh(hdev); @@ -426,23 +418,30 @@ static ssize_t inquiry_cache_read(struct file *file, char __user *userbuf, struct inquiry_data *data = &e->data; bdaddr_t bdaddr; baswap(&bdaddr, &data->bdaddr); - n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", - batostr(&bdaddr), - data->pscan_rep_mode, data->pscan_period_mode, - data->pscan_mode, data->dev_class[2], - data->dev_class[1], data->dev_class[0], - __le16_to_cpu(data->clock_offset), - data->rssi, data->ssp_mode, e->timestamp); + seq_printf(f, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", + batostr(&bdaddr), + data->pscan_rep_mode, data->pscan_period_mode, + data->pscan_mode, data->dev_class[2], + data->dev_class[1], data->dev_class[0], + __le16_to_cpu(data->clock_offset), + data->rssi, data->ssp_mode, e->timestamp); } hci_dev_unlock_bh(hdev); - return simple_read_from_buffer(userbuf, count, ppos, buf, n); + return 0; +} + +static int inquiry_cache_open(struct inode *inode, struct file *file) +{ + return single_open(file, inquiry_cache_show, inode->i_private); } static const struct file_operations inquiry_cache_fops = { - .open = inquiry_cache_open, - .read = inquiry_cache_read, + .open = inquiry_cache_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; int hci_register_sysfs(struct hci_dev *hdev) diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 400efa2..4db7ae2 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -3937,7 +3937,9 @@ drop: return 0; } -static ssize_t l2cap_sysfs_show(struct class *dev, char *buf) +static ssize_t l2cap_sysfs_show(struct class *dev, + struct class_attribute *attr, + char *buf) { struct sock *sk; struct hlist_node *node; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 89f4a59..db8a68e 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -2098,7 +2098,9 @@ static struct hci_cb rfcomm_cb = { .security_cfm = rfcomm_security_cfm }; -static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, char *buf) +static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, + struct class_attribute *attr, + char *buf) { struct rfcomm_session *s; struct list_head *pp, *p; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 4b5968d..ca87d6a 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1061,7 +1061,9 @@ done: return result; } -static ssize_t rfcomm_sock_sysfs_show(struct class *dev, char *buf) +static ssize_t rfcomm_sock_sysfs_show(struct class *dev, + struct class_attribute *attr, + char *buf) { struct sock *sk; struct hlist_node *node; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index dd8f6ec..f93b939 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -953,7 +953,9 @@ drop: return 0; } -static ssize_t sco_sysfs_show(struct class *dev, char *buf) +static ssize_t sco_sysfs_show(struct class *dev, + struct class_attribute *attr, + char *buf) { struct sock *sk; struct hlist_node *node; diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 19a6b96..d115d5c 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -35,6 +35,7 @@ config BRIDGE config BRIDGE_IGMP_SNOOPING bool "IGMP snooping" depends on BRIDGE + depends on INET default y ---help--- If you say Y here, then the Ethernet bridge will be able selectively diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index eb7062d..5b8a6e7 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -26,11 +26,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) const unsigned char *dest = skb->data; struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; + struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); - BR_INPUT_SKB_CB(skb)->brdev = dev; + brstats->tx_packets++; + brstats->tx_bytes += skb->len; - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); @@ -40,7 +41,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) goto out; mdst = br_mdb_get(br, skb); - if (mdst || BR_INPUT_SKB_CB(skb)->mrouters_only) + if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) br_multicast_deliver(mdst, skb); else br_flood_deliver(br, skb); @@ -81,6 +82,31 @@ static int br_dev_stop(struct net_device *dev) return 0; } +static struct net_device_stats *br_get_stats(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; + struct br_cpu_netstats sum = { 0 }; + unsigned int cpu; + + for_each_possible_cpu(cpu) { + const struct br_cpu_netstats *bstats + = per_cpu_ptr(br->stats, cpu); + + sum.tx_bytes += bstats->tx_bytes; + sum.tx_packets += bstats->tx_packets; + sum.rx_bytes += bstats->rx_bytes; + sum.rx_packets += bstats->rx_packets; + } + + stats->tx_bytes = sum.tx_bytes; + stats->tx_packets = sum.tx_packets; + stats->rx_bytes = sum.rx_bytes; + stats->rx_packets = sum.rx_packets; + + return stats; +} + static int br_change_mtu(struct net_device *dev, int new_mtu) { struct net_bridge *br = netdev_priv(dev); @@ -180,19 +206,28 @@ static const struct net_device_ops br_netdev_ops = { .ndo_open = br_dev_open, .ndo_stop = br_dev_stop, .ndo_start_xmit = br_dev_xmit, + .ndo_get_stats = br_get_stats, .ndo_set_mac_address = br_set_mac_address, .ndo_set_multicast_list = br_dev_set_multicast_list, .ndo_change_mtu = br_change_mtu, .ndo_do_ioctl = br_dev_ioctl, }; +static void br_dev_free(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + free_percpu(br->stats); + free_netdev(dev); +} + void br_dev_setup(struct net_device *dev) { random_ether_addr(dev->dev_addr); ether_setup(dev); dev->netdev_ops = &br_netdev_ops; - dev->destructor = free_netdev; + dev->destructor = br_dev_free; SET_ETHTOOL_OPS(dev, &br_ethtool_ops); dev->tx_queue_len = 0; dev->priv_flags = IFF_EBRIDGE; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index d61e6f7..8dbec83 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -19,6 +19,11 @@ #include <linux/netfilter_bridge.h> #include "br_private.h" +static int deliver_clone(const struct net_bridge_port *prev, + struct sk_buff *skb, + void (*__packet_hook)(const struct net_bridge_port *p, + struct sk_buff *skb)); + /* Don't forward packets to originating port or forwarding diasabled */ static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) @@ -94,17 +99,22 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) } /* called with rcu_read_lock */ -void br_forward(const struct net_bridge_port *to, struct sk_buff *skb) +void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0) { if (should_deliver(to, skb)) { - __br_forward(to, skb); + if (skb0) + deliver_clone(to, skb, __br_forward); + else + __br_forward(to, skb); return; } - kfree_skb(skb); + if (!skb0) + kfree_skb(skb); } -static int deliver_clone(struct net_bridge_port *prev, struct sk_buff *skb, +static int deliver_clone(const struct net_bridge_port *prev, + struct sk_buff *skb, void (*__packet_hook)(const struct net_bridge_port *p, struct sk_buff *skb)) { diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index b6a3872..b7cdd2e 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -185,6 +185,12 @@ static struct net_device *new_bridge_dev(struct net *net, const char *name) br = netdev_priv(dev); br->dev = dev; + br->stats = alloc_percpu(struct br_cpu_netstats); + if (!br->stats) { + free_netdev(dev); + return NULL; + } + spin_lock_init(&br->lock); INIT_LIST_HEAD(&br->port_list); spin_lock_init(&br->hash_lock); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 53b3985..333dfb7 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -23,9 +23,11 @@ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; static int br_pass_frame_up(struct sk_buff *skb) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; + struct net_bridge *br = netdev_priv(brdev); + struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); - brdev->stats.rx_packets++; - brdev->stats.rx_bytes += skb->len; + brstats->rx_packets++; + brstats->rx_bytes += skb->len; indev = skb->dev; skb->dev = brdev; @@ -70,7 +72,7 @@ int br_handle_frame_finish(struct sk_buff *skb) if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb); - if (mdst || BR_INPUT_SKB_CB(skb)->mrouters_only) { + if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { if ((mdst && !hlist_unhashed(&mdst->mglist)) || br_multicast_is_router(br)) skb2 = skb; @@ -90,7 +92,7 @@ int br_handle_frame_finish(struct sk_buff *skb) if (skb) { if (dst) - br_forward(dst->dst, skb); + br_forward(dst->dst, skb, skb2); else br_flood_forward(br, skb, skb2); } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2559fb5..6980625 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -38,7 +38,7 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get( struct net_bridge_mdb_entry *mp; struct hlist_node *p; - hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { + hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { if (dst == mp->addr) return mp; } @@ -49,22 +49,23 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get( static struct net_bridge_mdb_entry *br_mdb_ip_get( struct net_bridge_mdb_htable *mdb, __be32 dst) { + if (!mdb) + return NULL; + return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst)); } struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, struct sk_buff *skb) { - struct net_bridge_mdb_htable *mdb = br->mdb; - - if (!mdb || br->multicast_disabled) + if (br->multicast_disabled) return NULL; switch (skb->protocol) { case htons(ETH_P_IP): if (BR_INPUT_SKB_CB(skb)->igmp) break; - return br_mdb_ip_get(mdb, ip_hdr(skb)->daddr); + return br_mdb_ip_get(br->mdb, ip_hdr(skb)->daddr); } return NULL; @@ -627,8 +628,8 @@ static void br_multicast_port_query_expired(unsigned long data) struct net_bridge *br = port->br; spin_lock(&br->multicast_lock); - if (port && (port->state == BR_STATE_DISABLED || - port->state == BR_STATE_BLOCKING)) + if (port->state == BR_STATE_DISABLED || + port->state == BR_STATE_BLOCKING) goto out; if (port->multicast_startup_queries_sent < @@ -823,6 +824,7 @@ static int br_multicast_query(struct net_bridge *br, unsigned long max_delay; unsigned long now = jiffies; __be32 group; + int err = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -841,15 +843,17 @@ static int br_multicast_query(struct net_bridge *br, group = 0; } } else { - if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) - return -EINVAL; + if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) { + err = -EINVAL; + goto out; + } ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) - return 0; + goto out; - max_delay = ih3->code ? 1 : - IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE); + max_delay = ih3->code ? + IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; } if (!group) @@ -876,7 +880,7 @@ static int br_multicast_query(struct net_bridge *br, out: spin_unlock(&br->multicast_lock); - return 0; + return err; } static void br_multicast_leave_group(struct net_bridge *br, @@ -987,7 +991,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = pskb_trim_rcsum(skb2, len); if (err) - return err; + goto err_out; } len -= ip_hdrlen(skb2); @@ -1009,7 +1013,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, case CHECKSUM_NONE: skb2->csum = 0; if (skb_checksum_complete(skb2)) - return -EINVAL; + goto out; } err = 0; @@ -1036,6 +1040,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, out: __skb_push(skb2, offset); +err_out: if (skb2 != skb) kfree_skb(skb2); return err; @@ -1135,7 +1140,7 @@ void br_multicast_stop(struct net_bridge *br) if (mdb->old) { spin_unlock_bh(&br->multicast_lock); - synchronize_rcu_bh(); + rcu_barrier_bh(); spin_lock_bh(&br->multicast_lock); WARN_ON(mdb->old); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1cf2cef..791d4ab 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -135,6 +135,14 @@ struct net_bridge spinlock_t lock; struct list_head port_list; struct net_device *dev; + + struct br_cpu_netstats __percpu { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long tx_packets; + unsigned long tx_bytes; + } *stats; + spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; unsigned long feature_mask; @@ -206,12 +214,20 @@ struct net_bridge struct br_input_skb_cb { struct net_device *brdev; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING int igmp; int mrouters_only; +#endif }; #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (BR_INPUT_SKB_CB(__skb)->mrouters_only) +#else +# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (0) +#endif + extern struct notifier_block br_device_notifier; extern const u8 br_group_address[ETH_ALEN]; @@ -252,7 +268,7 @@ extern void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); extern int br_dev_queue_push_xmit(struct sk_buff *skb); extern void br_forward(const struct net_bridge_port *to, - struct sk_buff *skb); + struct sk_buff *skb, struct sk_buff *skb0); extern int br_forward_finish(struct sk_buff *skb); extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb); extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, @@ -423,7 +439,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ -extern struct sysfs_ops brport_sysfs_ops; +extern const struct sysfs_ops brport_sysfs_ops; extern int br_sysfs_addif(struct net_bridge_port *p); /* br_sysfs_br.c */ diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 696596c..0b99164 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -238,7 +238,7 @@ static ssize_t brport_store(struct kobject * kobj, return ret; } -struct sysfs_ops brport_sysfs_ops = { +const struct sysfs_ops brport_sysfs_ops = { .show = brport_show, .store = brport_store, }; diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 4644cc9..63e3888 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -4,7 +4,7 @@ * Authors: * Manohar Castelino <manohar.r.castelino@intel.com> * Kuo-Lang Tseng <kuo-lang.tseng@intel.com> - * Jan Engelhardt <jengelh@computergmbh.de> + * Jan Engelhardt <jengelh@medozas.de> * * Summary: * This is just a modification of the IPv4 code written by @@ -129,4 +129,5 @@ static void __exit ebt_ip6_fini(void) module_init(ebt_ip6_init); module_exit(ebt_ip6_fini); MODULE_DESCRIPTION("Ebtables: IPv6 protocol packet match"); +MODULE_AUTHOR("Kuo-Lang Tseng <kuo-lang.tseng@intel.com>"); MODULE_LICENSE("GPL"); diff --git a/net/core/dev.c b/net/core/dev.c index bcc490c..17b1686 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1931,7 +1931,7 @@ out_kfree_skb: return rc; } -static u32 skb_tx_hashrnd; +static u32 hashrnd __read_mostly; u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) { @@ -1949,7 +1949,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) else hash = skb->protocol; - hash = jhash_1word(hash, skb_tx_hashrnd); + hash = jhash_1word(hash, hashrnd); return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); } @@ -1959,10 +1959,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) { if (unlikely(queue_index >= dev->real_num_tx_queues)) { if (net_ratelimit()) { - WARN(1, "%s selects TX queue %d, but " + netdev_warn(dev, "selects TX queue %d, but " "real number of TX queues is %d\n", - dev->name, queue_index, - dev->real_num_tx_queues); + queue_index, dev->real_num_tx_queues); } return 0; } @@ -2175,6 +2174,172 @@ int weight_p __read_mostly = 64; /* old backlog weight */ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; +/* + * get_rps_cpu is called from netif_receive_skb and returns the target + * CPU from the RPS map of the receiving queue for a given skb. + */ +static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb) +{ + struct ipv6hdr *ip6; + struct iphdr *ip; + struct netdev_rx_queue *rxqueue; + struct rps_map *map; + int cpu = -1; + u8 ip_proto; + u32 addr1, addr2, ports, ihl; + + rcu_read_lock(); + + if (skb_rx_queue_recorded(skb)) { + u16 index = skb_get_rx_queue(skb); + if (unlikely(index >= dev->num_rx_queues)) { + if (net_ratelimit()) { + netdev_warn(dev, "received packet on queue " + "%u, but number of RX queues is %u\n", + index, dev->num_rx_queues); + } + goto done; + } + rxqueue = dev->_rx + index; + } else + rxqueue = dev->_rx; + + if (!rxqueue->rps_map) + goto done; + + if (skb->rxhash) + goto got_hash; /* Skip hash computation on packet header */ + + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + if (!pskb_may_pull(skb, sizeof(*ip))) + goto done; + + ip = (struct iphdr *) skb->data; + ip_proto = ip->protocol; + addr1 = ip->saddr; + addr2 = ip->daddr; + ihl = ip->ihl; + break; + case __constant_htons(ETH_P_IPV6): + if (!pskb_may_pull(skb, sizeof(*ip6))) + goto done; + + ip6 = (struct ipv6hdr *) skb->data; + ip_proto = ip6->nexthdr; + addr1 = ip6->saddr.s6_addr32[3]; + addr2 = ip6->daddr.s6_addr32[3]; + ihl = (40 >> 2); + break; + default: + goto done; + } + ports = 0; + switch (ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: + case IPPROTO_AH: + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + if (pskb_may_pull(skb, (ihl * 4) + 4)) + ports = *((u32 *) (skb->data + (ihl * 4))); + break; + + default: + break; + } + + skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd); + if (!skb->rxhash) + skb->rxhash = 1; + +got_hash: + map = rcu_dereference(rxqueue->rps_map); + if (map) { + u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; + + if (cpu_online(tcpu)) { + cpu = tcpu; + goto done; + } + } + +done: + rcu_read_unlock(); + return cpu; +} + +/* + * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled + * to be sent to kick remote softirq processing. There are two masks since + * the sending of IPIs must be done with interrupts enabled. The select field + * indicates the current mask that enqueue_backlog uses to schedule IPIs. + * select is flipped before net_rps_action is called while still under lock, + * net_rps_action then uses the non-selected mask to send the IPIs and clears + * it without conflicting with enqueue_backlog operation. + */ +struct rps_remote_softirq_cpus { + cpumask_t mask[2]; + int select; +}; +static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus); + +/* Called from hardirq (IPI) context */ +static void trigger_softirq(void *data) +{ + struct softnet_data *queue = data; + __napi_schedule(&queue->backlog); + __get_cpu_var(netdev_rx_stat).received_rps++; +} + +/* + * enqueue_to_backlog is called to queue an skb to a per CPU backlog + * queue (may be a remote CPU queue). + */ +static int enqueue_to_backlog(struct sk_buff *skb, int cpu) +{ + struct softnet_data *queue; + unsigned long flags; + + queue = &per_cpu(softnet_data, cpu); + + local_irq_save(flags); + __get_cpu_var(netdev_rx_stat).total++; + + spin_lock(&queue->input_pkt_queue.lock); + if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { + if (queue->input_pkt_queue.qlen) { +enqueue: + __skb_queue_tail(&queue->input_pkt_queue, skb); + spin_unlock_irqrestore(&queue->input_pkt_queue.lock, + flags); + return NET_RX_SUCCESS; + } + + /* Schedule NAPI for backlog device */ + if (napi_schedule_prep(&queue->backlog)) { + if (cpu != smp_processor_id()) { + struct rps_remote_softirq_cpus *rcpus = + &__get_cpu_var(rps_remote_softirq_cpus); + + cpu_set(cpu, rcpus->mask[rcpus->select]); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + } else + __napi_schedule(&queue->backlog); + } + goto enqueue; + } + + spin_unlock(&queue->input_pkt_queue.lock); + + __get_cpu_var(netdev_rx_stat).dropped++; + local_irq_restore(flags); + + kfree_skb(skb); + return NET_RX_DROP; +} /** * netif_rx - post buffer to the network code @@ -2193,8 +2358,7 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; int netif_rx(struct sk_buff *skb) { - struct softnet_data *queue; - unsigned long flags; + int cpu; /* if netpoll wants it, pretend we never saw it */ if (netpoll_rx(skb)) @@ -2203,31 +2367,11 @@ int netif_rx(struct sk_buff *skb) if (!skb->tstamp.tv64) net_timestamp(skb); - /* - * The code is rearranged so that the path is the most - * short when CPU is congested, but is still operating. - */ - local_irq_save(flags); - queue = &__get_cpu_var(softnet_data); - - __get_cpu_var(netdev_rx_stat).total++; - if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { - if (queue->input_pkt_queue.qlen) { -enqueue: - __skb_queue_tail(&queue->input_pkt_queue, skb); - local_irq_restore(flags); - return NET_RX_SUCCESS; - } - - napi_schedule(&queue->backlog); - goto enqueue; - } - - __get_cpu_var(netdev_rx_stat).dropped++; - local_irq_restore(flags); + cpu = get_rps_cpu(skb->dev, skb); + if (cpu < 0) + cpu = smp_processor_id(); - kfree_skb(skb); - return NET_RX_DROP; + return enqueue_to_backlog(skb, cpu); } EXPORT_SYMBOL(netif_rx); @@ -2464,22 +2608,7 @@ void netif_nit_deliver(struct sk_buff *skb) rcu_read_unlock(); } -/** - * netif_receive_skb - process receive buffer from network - * @skb: buffer to process - * - * netif_receive_skb() is the main receive data processing function. - * It always succeeds. The buffer may be dropped during processing - * for congestion control or by the protocol layers. - * - * This function may only be called from softirq context and interrupts - * should be enabled. - * - * Return values (usually ignored): - * NET_RX_SUCCESS: no congestion - * NET_RX_DROP: packet was dropped - */ -int netif_receive_skb(struct sk_buff *skb) +int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; struct net_device *orig_dev; @@ -2588,6 +2717,33 @@ out: rcu_read_unlock(); return ret; } + +/** + * netif_receive_skb - process receive buffer from network + * @skb: buffer to process + * + * netif_receive_skb() is the main receive data processing function. + * It always succeeds. The buffer may be dropped during processing + * for congestion control or by the protocol layers. + * + * This function may only be called from softirq context and interrupts + * should be enabled. + * + * Return values (usually ignored): + * NET_RX_SUCCESS: no congestion + * NET_RX_DROP: packet was dropped + */ +int netif_receive_skb(struct sk_buff *skb) +{ + int cpu; + + cpu = get_rps_cpu(skb->dev, skb); + + if (cpu < 0) + return __netif_receive_skb(skb); + else + return enqueue_to_backlog(skb, cpu); +} EXPORT_SYMBOL(netif_receive_skb); /* Network device is going away, flush any packets still pending */ @@ -2914,16 +3070,16 @@ static int process_backlog(struct napi_struct *napi, int quota) do { struct sk_buff *skb; - local_irq_disable(); + spin_lock_irq(&queue->input_pkt_queue.lock); skb = __skb_dequeue(&queue->input_pkt_queue); if (!skb) { __napi_complete(napi); - local_irq_enable(); + spin_unlock_irq(&queue->input_pkt_queue.lock); break; } - local_irq_enable(); + spin_unlock_irq(&queue->input_pkt_queue.lock); - netif_receive_skb(skb); + __netif_receive_skb(skb); } while (++work < quota && jiffies == start_time); return work; @@ -3012,6 +3168,22 @@ void netif_napi_del(struct napi_struct *napi) } EXPORT_SYMBOL(netif_napi_del); +/* + * net_rps_action sends any pending IPI's for rps. This is only called from + * softirq and interrupts must be enabled. + */ +static void net_rps_action(cpumask_t *mask) +{ + int cpu; + + /* Send pending IPI's to kick RPS processing on remote cpus. */ + for_each_cpu_mask_nr(cpu, *mask) { + struct softnet_data *queue = &per_cpu(softnet_data, cpu); + if (cpu_online(cpu)) + __smp_call_function_single(cpu, &queue->csd, 0); + } + cpus_clear(*mask); +} static void net_rx_action(struct softirq_action *h) { @@ -3019,6 +3191,8 @@ static void net_rx_action(struct softirq_action *h) unsigned long time_limit = jiffies + 2; int budget = netdev_budget; void *have; + int select; + struct rps_remote_softirq_cpus *rcpus; local_irq_disable(); @@ -3081,8 +3255,14 @@ static void net_rx_action(struct softirq_action *h) netpoll_poll_unlock(have); } out: + rcpus = &__get_cpu_var(rps_remote_softirq_cpus); + select = rcpus->select; + rcpus->select ^= 1; + local_irq_enable(); + net_rps_action(&rcpus->mask[select]); + #ifdef CONFIG_NET_DMA /* * There may not be any more sk_buffs coming right now, so push @@ -3327,10 +3507,10 @@ static int softnet_seq_show(struct seq_file *seq, void *v) { struct netif_rx_stats *s = v; - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", s->total, s->dropped, s->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ - s->cpu_collision); + s->cpu_collision, s->received_rps); return 0; } @@ -5067,6 +5247,23 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; + if (!dev->num_rx_queues) { + /* + * Allocate a single RX queue if driver never called + * alloc_netdev_mq + */ + + dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL); + if (!dev->_rx) { + ret = -ENOMEM; + goto out; + } + + dev->_rx->first = dev->_rx; + atomic_set(&dev->_rx->count, 1); + dev->num_rx_queues = 1; + } + /* Init, if this function is available */ if (dev->netdev_ops->ndo_init) { ret = dev->netdev_ops->ndo_init(dev); @@ -5424,9 +5621,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, void (*setup)(struct net_device *), unsigned int queue_count) { struct netdev_queue *tx; + struct netdev_rx_queue *rx; struct net_device *dev; size_t alloc_size; struct net_device *p; + int i; BUG_ON(strlen(name) >= sizeof(dev->name)); @@ -5452,11 +5651,27 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, goto free_p; } + rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL); + if (!rx) { + printk(KERN_ERR "alloc_netdev: Unable to allocate " + "rx queues.\n"); + goto free_tx; + } + + atomic_set(&rx->count, queue_count); + + /* + * Set a pointer to first element in the array which holds the + * reference count. + */ + for (i = 0; i < queue_count; i++) + rx[i].first = rx; + dev = PTR_ALIGN(p, NETDEV_ALIGN); dev->padded = (char *)dev - (char *)p; if (dev_addr_init(dev)) - goto free_tx; + goto free_rx; dev_unicast_init(dev); @@ -5466,6 +5681,9 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev->num_tx_queues = queue_count; dev->real_num_tx_queues = queue_count; + dev->_rx = rx; + dev->num_rx_queues = queue_count; + dev->gso_max_size = GSO_MAX_SIZE; netdev_init_queues(dev); @@ -5480,9 +5698,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, strcpy(dev->name, name); return dev; +free_rx: + kfree(rx); free_tx: kfree(tx); - free_p: kfree(p); return NULL; @@ -5985,6 +6204,10 @@ static int __init net_dev_init(void) queue->completion_queue = NULL; INIT_LIST_HEAD(&queue->poll_list); + queue->csd.func = trigger_softirq; + queue->csd.info = queue; + queue->csd.flags = 0; + queue->backlog.poll = process_backlog; queue->backlog.weight = weight_p; queue->backlog.gro_list = NULL; @@ -6023,7 +6246,7 @@ subsys_initcall(net_dev_init); static int __init initialize_hashrnd(void) { - get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); + get_random_bytes(&hashrnd, sizeof(hashrnd)); return 0; } diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index fd91569..3dc295b 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -97,8 +97,9 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) netif_addr_lock_bh(dev); if (alen != dev->addr_len) - return -EINVAL; - err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); + err = -EINVAL; + else + err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0f2f821..f4cb6b6 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -17,6 +17,7 @@ #include <linux/errno.h> #include <linux/ethtool.h> #include <linux/netdevice.h> +#include <linux/bitops.h> #include <asm/uaccess.h> /* @@ -199,10 +200,7 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) return dev->ethtool_ops->set_settings(dev, &cmd); } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) { struct ethtool_drvinfo info; const struct ethtool_ops *ops = dev->ethtool_ops; @@ -214,6 +212,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use info.cmd = ETHTOOL_GDRVINFO; ops->get_drvinfo(dev, &info); + /* + * this method of obtaining string set info is deprecated; + * Use ETHTOOL_GSSET_INFO instead. + */ if (ops->get_sset_count) { int rc; @@ -237,10 +239,67 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use return 0; } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, + void __user *useraddr) +{ + struct ethtool_sset_info info; + const struct ethtool_ops *ops = dev->ethtool_ops; + u64 sset_mask; + int i, idx = 0, n_bits = 0, ret, rc; + u32 *info_buf = NULL; + + if (!ops->get_sset_count) + return -EOPNOTSUPP; + + if (copy_from_user(&info, useraddr, sizeof(info))) + return -EFAULT; + + /* store copy of mask, because we zero struct later on */ + sset_mask = info.sset_mask; + if (!sset_mask) + return 0; + + /* calculate size of return buffer */ + n_bits = hweight64(sset_mask); + + memset(&info, 0, sizeof(info)); + info.cmd = ETHTOOL_GSSET_INFO; + + info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER); + if (!info_buf) + return -ENOMEM; + + /* + * fill return buffer based on input bitmask and successful + * get_sset_count return + */ + for (i = 0; i < 64; i++) { + if (!(sset_mask & (1ULL << i))) + continue; + + rc = ops->get_sset_count(dev, i); + if (rc >= 0) { + info.sset_mask |= (1ULL << i); + info_buf[idx++] = rc; + } + } + + ret = -EFAULT; + if (copy_to_user(useraddr, &info, sizeof(info))) + goto out; + + useraddr += offsetof(struct ethtool_sset_info, data); + if (copy_to_user(useraddr, info_buf, idx * sizeof(u32))) + goto out; + + ret = 0; + +out: + kfree(info_buf); + return ret; +} + +static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) { struct ethtool_rxnfc cmd; @@ -253,10 +312,7 @@ static noinline int ethtool_set_rxnfc(struct net_device *dev, void __user *usera return dev->ethtool_ops->set_rxnfc(dev, &cmd); } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) { struct ethtool_rxnfc info; const struct ethtool_ops *ops = dev->ethtool_ops; @@ -328,10 +384,7 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, list->count++; } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr) { struct ethtool_rx_ntuple cmd; const struct ethtool_ops *ops = dev->ethtool_ops; @@ -799,10 +852,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) return ret; } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) { struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; @@ -816,10 +866,7 @@ static noinline int ethtool_get_coalesce(struct net_device *dev, void __user *us return 0; } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) { struct ethtool_coalesce coalesce; @@ -1229,10 +1276,7 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr, return actor(dev, edata.data); } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_flash_device(struct net_device *dev, char __user *useraddr) +static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr) { struct ethtool_flash efl; @@ -1471,6 +1515,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXNTUPLE: rc = ethtool_get_rx_ntuple(dev, useraddr); break; + case ETHTOOL_GSSET_INFO: + rc = ethtool_get_sset_info(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 9a24377..2ff3489 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -108,7 +108,7 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) struct fib_rules_ops *ops; int err; - ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL); + ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL); if (ops == NULL) return ERR_PTR(-ENOMEM); @@ -123,7 +123,6 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) return ops; } - EXPORT_SYMBOL_GPL(fib_rules_register); void fib_rules_cleanup_ops(struct fib_rules_ops *ops) @@ -157,7 +156,6 @@ void fib_rules_unregister(struct fib_rules_ops *ops) call_rcu(&ops->rcu, fib_rules_put_rcu); } - EXPORT_SYMBOL_GPL(fib_rules_unregister); static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, @@ -220,7 +218,6 @@ out: return err; } - EXPORT_SYMBOL_GPL(fib_rules_lookup); static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, @@ -613,7 +610,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) break; cb->args[1] = 0; - skip: +skip: idx++; } rcu_read_unlock(); @@ -685,7 +682,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, struct fib_rules_ops *ops; ASSERT_RTNL(); - rcu_read_lock(); switch (event) { case NETDEV_REGISTER: @@ -699,8 +695,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, break; } - rcu_read_unlock(); - return NOTIFY_DONE; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d102f6d..6cee643 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -771,6 +771,8 @@ static __inline__ int neigh_max_probes(struct neighbour *n) } static void neigh_invalidate(struct neighbour *neigh) + __releases(neigh->lock) + __acquires(neigh->lock) { struct sk_buff *skb; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 099c753..7a46343 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -466,6 +466,216 @@ static struct attribute_group wireless_group = { }; #endif +/* + * RX queue sysfs structures and functions. + */ +struct rx_queue_attribute { + struct attribute attr; + ssize_t (*show)(struct netdev_rx_queue *queue, + struct rx_queue_attribute *attr, char *buf); + ssize_t (*store)(struct netdev_rx_queue *queue, + struct rx_queue_attribute *attr, const char *buf, size_t len); +}; +#define to_rx_queue_attr(_attr) container_of(_attr, \ + struct rx_queue_attribute, attr) + +#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj) + +static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); + struct netdev_rx_queue *queue = to_rx_queue(kobj); + + if (!attribute->show) + return -EIO; + + return attribute->show(queue, attribute, buf); +} + +static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); + struct netdev_rx_queue *queue = to_rx_queue(kobj); + + if (!attribute->store) + return -EIO; + + return attribute->store(queue, attribute, buf, count); +} + +static struct sysfs_ops rx_queue_sysfs_ops = { + .show = rx_queue_attr_show, + .store = rx_queue_attr_store, +}; + +static ssize_t show_rps_map(struct netdev_rx_queue *queue, + struct rx_queue_attribute *attribute, char *buf) +{ + struct rps_map *map; + cpumask_var_t mask; + size_t len = 0; + int i; + + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + rcu_read_lock(); + map = rcu_dereference(queue->rps_map); + if (map) + for (i = 0; i < map->len; i++) + cpumask_set_cpu(map->cpus[i], mask); + + len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); + if (PAGE_SIZE - len < 3) { + rcu_read_unlock(); + free_cpumask_var(mask); + return -EINVAL; + } + rcu_read_unlock(); + + free_cpumask_var(mask); + len += sprintf(buf + len, "\n"); + return len; +} + +static void rps_map_release(struct rcu_head *rcu) +{ + struct rps_map *map = container_of(rcu, struct rps_map, rcu); + + kfree(map); +} + +ssize_t store_rps_map(struct netdev_rx_queue *queue, + struct rx_queue_attribute *attribute, + const char *buf, size_t len) +{ + struct rps_map *old_map, *map; + cpumask_var_t mask; + int err, cpu, i; + static DEFINE_SPINLOCK(rps_map_lock); + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); + if (err) { + free_cpumask_var(mask); + return err; + } + + map = kzalloc(max_t(unsigned, + RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES), + GFP_KERNEL); + if (!map) { + free_cpumask_var(mask); + return -ENOMEM; + } + + i = 0; + for_each_cpu_and(cpu, mask, cpu_online_mask) + map->cpus[i++] = cpu; + + if (i) + map->len = i; + else { + kfree(map); + map = NULL; + } + + spin_lock(&rps_map_lock); + old_map = queue->rps_map; + rcu_assign_pointer(queue->rps_map, map); + spin_unlock(&rps_map_lock); + + if (old_map) + call_rcu(&old_map->rcu, rps_map_release); + + free_cpumask_var(mask); + return len; +} + +static struct rx_queue_attribute rps_cpus_attribute = + __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map); + +static struct attribute *rx_queue_default_attrs[] = { + &rps_cpus_attribute.attr, + NULL +}; + +static void rx_queue_release(struct kobject *kobj) +{ + struct netdev_rx_queue *queue = to_rx_queue(kobj); + struct rps_map *map = queue->rps_map; + struct netdev_rx_queue *first = queue->first; + + if (map) + call_rcu(&map->rcu, rps_map_release); + + if (atomic_dec_and_test(&first->count)) + kfree(first); +} + +static struct kobj_type rx_queue_ktype = { + .sysfs_ops = &rx_queue_sysfs_ops, + .release = rx_queue_release, + .default_attrs = rx_queue_default_attrs, +}; + +static int rx_queue_add_kobject(struct net_device *net, int index) +{ + struct netdev_rx_queue *queue = net->_rx + index; + struct kobject *kobj = &queue->kobj; + int error = 0; + + kobj->kset = net->queues_kset; + error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, + "rx-%u", index); + if (error) { + kobject_put(kobj); + return error; + } + + kobject_uevent(kobj, KOBJ_ADD); + + return error; +} + +static int rx_queue_register_kobjects(struct net_device *net) +{ + int i; + int error = 0; + + net->queues_kset = kset_create_and_add("queues", + NULL, &net->dev.kobj); + if (!net->queues_kset) + return -ENOMEM; + for (i = 0; i < net->num_rx_queues; i++) { + error = rx_queue_add_kobject(net, i); + if (error) + break; + } + + if (error) + while (--i >= 0) + kobject_put(&net->_rx[i].kobj); + + return error; +} + +static void rx_queue_remove_kobjects(struct net_device *net) +{ + int i; + + for (i = 0; i < net->num_rx_queues; i++) + kobject_put(&net->_rx[i].kobj); + kset_unregister(net->queues_kset); +} + #endif /* CONFIG_SYSFS */ #ifdef CONFIG_HOTPLUG @@ -529,6 +739,8 @@ void netdev_unregister_kobject(struct net_device * net) if (!net_eq(dev_net(net), &init_net)) return; + rx_queue_remove_kobjects(net); + device_del(dev); } @@ -537,6 +749,7 @@ int netdev_register_kobject(struct net_device *net) { struct device *dev = &(net->dev); const struct attribute_group **groups = net->sysfs_groups; + int error = 0; dev->class = &net_class; dev->platform_data = net; @@ -563,7 +776,17 @@ int netdev_register_kobject(struct net_device *net) if (!net_eq(dev_net(net), &init_net)) return 0; - return device_add(dev); + error = device_add(dev); + if (error) + return error; + + error = rx_queue_register_kobjects(net); + if (error) { + device_del(dev); + return error; + } + + return error; } int netdev_class_create_file(struct class_attribute *class_attr) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 7aa6972..d4ec38f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -735,7 +735,7 @@ int netpoll_setup(struct netpoll *np) npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); if (!npinfo) { err = -ENOMEM; - goto release; + goto put; } npinfo->rx_flags = 0; @@ -845,7 +845,7 @@ int netpoll_setup(struct netpoll *np) kfree(npinfo); } - +put: dev_put(ndev); return err; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4568120..e1121f0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -600,7 +600,39 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->rx_compressed = b->rx_compressed; a->tx_compressed = b->tx_compressed; -}; +} + +static void copy_rtnl_link_stats64(struct rtnl_link_stats64 *a, + const struct net_device_stats *b) +{ + a->rx_packets = b->rx_packets; + a->tx_packets = b->tx_packets; + a->rx_bytes = b->rx_bytes; + a->tx_bytes = b->tx_bytes; + a->rx_errors = b->rx_errors; + a->tx_errors = b->tx_errors; + a->rx_dropped = b->rx_dropped; + a->tx_dropped = b->tx_dropped; + + a->multicast = b->multicast; + a->collisions = b->collisions; + + a->rx_length_errors = b->rx_length_errors; + a->rx_over_errors = b->rx_over_errors; + a->rx_crc_errors = b->rx_crc_errors; + a->rx_frame_errors = b->rx_frame_errors; + a->rx_fifo_errors = b->rx_fifo_errors; + a->rx_missed_errors = b->rx_missed_errors; + + a->tx_aborted_errors = b->tx_aborted_errors; + a->tx_carrier_errors = b->tx_carrier_errors; + a->tx_fifo_errors = b->tx_fifo_errors; + a->tx_heartbeat_errors = b->tx_heartbeat_errors; + a->tx_window_errors = b->tx_window_errors; + + a->rx_compressed = b->rx_compressed; + a->tx_compressed = b->tx_compressed; +} static inline int rtnl_vfinfo_size(const struct net_device *dev) { @@ -698,6 +730,14 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, stats = dev_get_stats(dev); copy_rtnl_link_stats(nla_data(attr), stats); + attr = nla_reserve(skb, IFLA_STATS64, + sizeof(struct rtnl_link_stats64)); + if (attr == NULL) + goto nla_put_failure; + + stats = dev_get_stats(dev); + copy_rtnl_link_stats64(nla_data(attr), stats); + if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { int i; struct ifla_vf_info ivi; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 93c4e06..bdea0ef 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -534,6 +534,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->network_header = old->network_header; new->mac_header = old->mac_header; skb_dst_set(new, dst_clone(skb_dst(old))); + new->rxhash = old->rxhash; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif @@ -581,6 +582,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(len); C(data_len); C(mac_len); + C(rxhash); n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; n->cloned = 1; n->nohdr = 0; diff --git a/net/core/sock.c b/net/core/sock.c index fcd397a..c5812bb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -340,8 +340,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); - } else - sk_add_backlog(sk, skb); + } else if (sk_add_backlog(sk, skb)) { + bh_unlock_sock(sk); + atomic_inc(&sk->sk_drops); + goto discard_and_relse; + } + bh_unlock_sock(sk); out: sock_put(sk); @@ -1139,6 +1143,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) sock_lock_init(newsk); bh_lock_sock(newsk); newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; + newsk->sk_backlog.len = 0; atomic_set(&newsk->sk_rmem_alloc, 0); /* @@ -1542,6 +1547,12 @@ static void __release_sock(struct sock *sk) bh_lock_sock(sk); } while ((skb = sk->sk_backlog.head) != NULL); + + /* + * Doing the zeroing here guarantee we can not loop forever + * while a wild producer attempts to flood us. + */ + sk->sk_backlog.len = 0; } /** @@ -1874,6 +1885,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; + sk->sk_backlog.limit = sk->sk_rcvbuf << 1; sk->sk_state = TCP_CLOSE; sk_set_socket(sk, sock); @@ -2276,7 +2288,8 @@ out_free_request_sock_slab: prot->rsk_prot->slab = NULL; } out_free_request_sock_slab_name: - kfree(prot->rsk_prot->slab_name); + if (prot->rsk_prot) + kfree(prot->rsk_prot->slab_name); out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b195c4f..4071eaf 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -998,11 +998,11 @@ static struct inet_protosw dccp_v4_protosw = { static int __net_init dccp_v4_init_net(struct net *net) { - int err; + if (dccp_hashinfo.bhash == NULL) + return -ESOCKTNOSUPPORT; - err = inet_ctl_sock_create(&net->dccp.v4_ctl_sk, PF_INET, - SOCK_DCCP, IPPROTO_DCCP, net); - return err; + return inet_ctl_sock_create(&net->dccp.v4_ctl_sk, PF_INET, + SOCK_DCCP, IPPROTO_DCCP, net); } static void __net_exit dccp_v4_exit_net(struct net *net) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 1aec634..af3394d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1191,11 +1191,11 @@ static struct inet_protosw dccp_v6_protosw = { static int __net_init dccp_v6_init_net(struct net *net) { - int err; + if (dccp_hashinfo.bhash == NULL) + return -ESOCKTNOSUPPORT; - err = inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6, - SOCK_DCCP, IPPROTO_DCCP, net); - return err; + return inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6, + SOCK_DCCP, IPPROTO_DCCP, net); } static void __net_exit dccp_v6_exit_net(struct net *net) diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index af226a0..0d508c3 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -254,7 +254,7 @@ int dccp_child_process(struct sock *parent, struct sock *child, * in main socket hash table and lock on listening * socket does not protect us more. */ - sk_add_backlog(child, skb); + __sk_add_backlog(child, skb); } bh_unlock_sock(child); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 0ef7061..aa4cef3 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1036,7 +1036,7 @@ static int __init dccp_init(void) FIELD_SIZEOF(struct sk_buff, cb)); rc = percpu_counter_init(&dccp_orphan_count, 0); if (rc) - goto out; + goto out_fail; rc = -ENOBUFS; inet_hashinfo_init(&dccp_hashinfo); dccp_hashinfo.bind_bucket_cachep = @@ -1125,8 +1125,9 @@ static int __init dccp_init(void) goto out_sysctl_exit; dccp_timestamping_init(); -out: - return rc; + + return 0; + out_sysctl_exit: dccp_sysctl_exit(); out_ackvec_exit: @@ -1135,18 +1136,19 @@ out_free_dccp_mib: dccp_mib_exit(); out_free_dccp_bhash: free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); - dccp_hashinfo.bhash = NULL; out_free_dccp_locks: inet_ehash_locks_free(&dccp_hashinfo); out_free_dccp_ehash: free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); - dccp_hashinfo.ehash = NULL; out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); - dccp_hashinfo.bind_bucket_cachep = NULL; out_free_percpu: percpu_counter_destroy(&dccp_orphan_count); - goto out; +out_fail: + dccp_hashinfo.bhash = NULL; + dccp_hashinfo.ehash = NULL; + dccp_hashinfo.bind_bucket_cachep = NULL; + return rc; } static void __exit dccp_fini(void) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 0c94a1a..c9a1c68 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -587,9 +587,15 @@ choice config DEFAULT_HTCP bool "Htcp" if TCP_CONG_HTCP=y + config DEFAULT_HYBLA + bool "Hybla" if TCP_CONG_HYBLA=y + config DEFAULT_VEGAS bool "Vegas" if TCP_CONG_VEGAS=y + config DEFAULT_VENO + bool "Veno" if TCP_CONG_VENO=y + config DEFAULT_WESTWOOD bool "Westwood" if TCP_CONG_WESTWOOD=y @@ -610,8 +616,10 @@ config DEFAULT_TCP_CONG default "bic" if DEFAULT_BIC default "cubic" if DEFAULT_CUBIC default "htcp" if DEFAULT_HTCP + default "hybla" if DEFAULT_HYBLA default "vegas" if DEFAULT_VEGAS default "westwood" if DEFAULT_WESTWOOD + default "veno" if DEFAULT_VENO default "reno" if DEFAULT_RENO default "cubic" diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c0c5274..f47c9f7 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1144,12 +1144,9 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, if (saddr) memcpy(&iph->saddr, saddr, 4); - - if (daddr) { + if (daddr) memcpy(&iph->daddr, daddr, 4); - return t->hlen; - } - if (iph->daddr && !ipv4_is_multicast(iph->daddr)) + if (iph->daddr) return t->hlen; return -t->hlen; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 10a6a60..6789092 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -187,6 +187,16 @@ struct ic_device { static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ static struct net_device *ic_dev __initdata = NULL; /* Selected device */ +static bool __init ic_device_match(struct net_device *dev) +{ + if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : + (!(dev->flags & IFF_LOOPBACK) && + (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && + strncmp(dev->name, "dummy", 5))) + return true; + return false; +} + static int __init ic_open_devs(void) { struct ic_device *d, **last; @@ -207,10 +217,7 @@ static int __init ic_open_devs(void) for_each_netdev(&init_net, dev) { if (dev->flags & IFF_LOOPBACK) continue; - if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : - (!(dev->flags & IFF_LOOPBACK) && - (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && - strncmp(dev->name, "dummy", 5))) { + if (ic_device_match(dev)) { int able = 0; if (dev->mtu >= 364) able |= IC_BOOTP; @@ -228,7 +235,7 @@ static int __init ic_open_devs(void) } if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) { rtnl_unlock(); - return -1; + return -ENOMEM; } d->dev = dev; *last = d; @@ -253,7 +260,7 @@ static int __init ic_open_devs(void) printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name); else printk(KERN_ERR "IP-Config: No network devices available.\n"); - return -1; + return -ENODEV; } return 0; } @@ -1303,6 +1310,32 @@ __be32 __init root_nfs_parse_addr(char *name) return addr; } +#define DEVICE_WAIT_MAX 12 /* 12 seconds */ + +static int __init wait_for_devices(void) +{ + int i; + + msleep(CONF_PRE_OPEN); + for (i = 0; i < DEVICE_WAIT_MAX; i++) { + struct net_device *dev; + int found = 0; + + rtnl_lock(); + for_each_netdev(&init_net, dev) { + if (ic_device_match(dev)) { + found = 1; + break; + } + } + rtnl_unlock(); + if (found) + return 0; + ssleep(1); + } + return -ENODEV; +} + /* * IP Autoconfig dispatcher. */ @@ -1313,6 +1346,7 @@ static int __init ip_auto_config(void) #ifdef IPCONFIG_DYNAMIC int retries = CONF_OPEN_RETRIES; #endif + int err; #ifdef CONFIG_PROC_FS proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); @@ -1325,12 +1359,15 @@ static int __init ip_auto_config(void) #ifdef IPCONFIG_DYNAMIC try_try_again: #endif - /* Give hardware a chance to settle */ - msleep(CONF_PRE_OPEN); + /* Wait for devices to appear */ + err = wait_for_devices(); + if (err) + return err; /* Setup all network devices */ - if (ic_open_devs() < 0) - return -1; + err = ic_open_devs(); + if (err) + return err; /* Give drivers a chance to settle */ ssleep(CONF_POST_OPEN); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 242ed23..4f1f337 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -249,6 +249,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), + SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), + SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b2ba558..a770df24 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -146,7 +146,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); static int rt_garbage_collect(struct dst_ops *ops); -static void rt_emergency_hash_rebuild(struct net *net); static struct dst_ops ipv4_dst_ops = { @@ -780,11 +779,30 @@ static void rt_do_flush(int process_context) #define FRACT_BITS 3 #define ONE (1UL << FRACT_BITS) +/* + * Given a hash chain and an item in this hash chain, + * find if a previous entry has the same hash_inputs + * (but differs on tos, mark or oif) + * Returns 0 if an alias is found. + * Returns ONE if rth has no alias before itself. + */ +static int has_noalias(const struct rtable *head, const struct rtable *rth) +{ + const struct rtable *aux = head; + + while (aux != rth) { + if (compare_hash_inputs(&aux->fl, &rth->fl)) + return 0; + aux = aux->u.dst.rt_next; + } + return ONE; +} + static void rt_check_expire(void) { static unsigned int rover; unsigned int i = rover, goal; - struct rtable *rth, *aux, **rthp; + struct rtable *rth, **rthp; unsigned long samples = 0; unsigned long sum = 0, sum2 = 0; unsigned long delta; @@ -835,15 +853,7 @@ nofree: * attributes don't unfairly skew * the length computation */ - for (aux = rt_hash_table[i].chain;;) { - if (aux == rth) { - length += ONE; - break; - } - if (compare_hash_inputs(&aux->fl, &rth->fl)) - break; - aux = aux->u.dst.rt_next; - } + length += has_noalias(rt_hash_table[i].chain, rth); continue; } } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) @@ -922,10 +932,8 @@ static void rt_secret_rebuild_oneshot(struct net *net) { del_timer_sync(&net->ipv4.rt_secret_timer); rt_cache_invalidate(net); - if (ip_rt_secret_interval) { - net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval; - add_timer(&net->ipv4.rt_secret_timer); - } + if (ip_rt_secret_interval) + mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); } static void rt_emergency_hash_rebuild(struct net *net) @@ -1073,6 +1081,21 @@ work_done: out: return 0; } +/* + * Returns number of entries in a hash chain that have different hash_inputs + */ +static int slow_chain_length(const struct rtable *head) +{ + int length = 0; + const struct rtable *rth = head; + + while (rth) { + length += has_noalias(head, rth); + rth = rth->u.dst.rt_next; + } + return length >> FRACT_BITS; +} + static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp, struct sk_buff *skb) { @@ -1185,7 +1208,8 @@ restart: rt_free(cand); } } else { - if (chain_length > rt_chain_length_max) { + if (chain_length > rt_chain_length_max && + slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { struct net *net = dev_net(rt->u.dst.dev); int num = ++net->ipv4.current_rt_cache_rebuild_count; if (!rt_caching(dev_net(rt->u.dst.dev))) { @@ -3077,22 +3101,20 @@ static void rt_secret_reschedule(int old) rtnl_lock(); for_each_net(net) { int deleted = del_timer_sync(&net->ipv4.rt_secret_timer); + long time; if (!new) continue; if (deleted) { - long time = net->ipv4.rt_secret_timer.expires - jiffies; + time = net->ipv4.rt_secret_timer.expires - jiffies; if (time <= 0 || (time += diff) <= 0) time = 0; - - net->ipv4.rt_secret_timer.expires = time; } else - net->ipv4.rt_secret_timer.expires = new; + time = new; - net->ipv4.rt_secret_timer.expires += jiffies; - add_timer(&net->ipv4.rt_secret_timer); + mod_timer(&net->ipv4.rt_secret_timer, jiffies + time); } rtnl_unlock(); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c3588b4..70df409 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1651,13 +1651,15 @@ int tcp_v4_rcv(struct sk_buff *skb) if (!sk) goto no_tcp_socket; - if (iph->ttl < inet_sk(sk)->min_ttl) - goto discard_and_relse; - process: if (sk->sk_state == TCP_TIME_WAIT) goto do_time_wait; + if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { + NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + goto discard_and_relse; + } + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; nf_reset(skb); @@ -1682,8 +1684,11 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); } - } else - sk_add_backlog(sk, skb); + } else if (unlikely(sk_add_backlog(sk, skb))) { + bh_unlock_sock(sk); + NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); + goto discard_and_relse; + } bh_unlock_sock(sk); sock_put(sk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f206ee5..4199bc6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -728,7 +728,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, * in main socket hash table and lock on listening * socket does not protect us more. */ - sk_add_backlog(child, skb); + __sk_add_backlog(child, skb); } bh_unlock_sock(child); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4a1605d..f181b78 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2395,13 +2395,17 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct tcp_extend_values *xvp = tcp_xv(rvp); struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_cookie_values *cvp = tp->cookie_values; struct tcphdr *th; struct sk_buff *skb; struct tcp_md5sig_key *md5; int tcp_header_size; int mss; + int s_data_desired = 0; - skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); + if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) + s_data_desired = cvp->s_data_desired; + skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC); if (skb == NULL) return NULL; @@ -2457,16 +2461,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); if (OPTION_COOKIE_EXTENSION & opts.options) { - const struct tcp_cookie_values *cvp = tp->cookie_values; - - if (cvp != NULL && - cvp->s_data_constant && - cvp->s_data_desired > 0) { - u8 *buf = skb_put(skb, cvp->s_data_desired); + if (s_data_desired) { + u8 *buf = skb_put(skb, s_data_desired); /* copy data directly from the listening socket. */ - memcpy(buf, cvp->s_data_payload, cvp->s_data_desired); - TCP_SKB_CB(skb)->end_seq += cvp->s_data_desired; + memcpy(buf, cvp->s_data_payload, s_data_desired); + TCP_SKB_CB(skb)->end_seq += s_data_desired; } if (opts.hash_size > 0) { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index a17629b..b2e6bbc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -134,7 +134,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) } /* This function calculates a "timeout" which is equivalent to the timeout of a - * TCP connection after "boundary" unsucessful, exponentially backed-off + * TCP connection after "boundary" unsuccessful, exponentially backed-off * retransmissions with an initial RTO of TCP_RTO_MIN. */ static bool retransmits_timed_out(struct sock *sk, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 608a544..7af756d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1371,8 +1371,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) bh_lock_sock(sk); if (!sock_owned_by_user(sk)) rc = __udp_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); + else if (sk_add_backlog(sk, skb)) { + bh_unlock_sock(sk); + goto drop; + } bh_unlock_sock(sk); return rc; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 67107d6..e4a1483 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -91,11 +91,12 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, return 0; } -static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct rtable *rt = (struct rtable *)xdst->route; - xdst->u.rt.fl = rt->fl; + xdst->u.rt.fl = *fl; xdst->u.dst.dev = dev; dev_hold(dev); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 88fd8c5..3381b43 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1380,6 +1380,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) if (dad_failed) ifp->flags |= IFA_F_DADFAILED; spin_unlock_bh(&ifp->lock); + if (dad_failed) + ipv6_ifa_notify(0, ifp); in6_ifa_put(ifp); #ifdef CONFIG_IPV6_PRIVACY } else if (ifp->flags&IFA_F_TEMPORARY) { @@ -2615,7 +2617,7 @@ static void addrconf_bonding_change(struct net_device *dev, unsigned long event) static int addrconf_ifdown(struct net_device *dev, int how) { struct inet6_dev *idev; - struct inet6_ifaddr *ifa, **bifa; + struct inet6_ifaddr *ifa, *keep_list, **bifa; struct net *net = dev_net(dev); int i; @@ -2649,11 +2651,11 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_lock_bh(&addrconf_hash_lock); while ((ifa = *bifa) != NULL) { if (ifa->idev == idev && - (how || !(ifa->flags&IFA_F_PERMANENT))) { + (how || !(ifa->flags&IFA_F_PERMANENT) || + ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) { *bifa = ifa->lst_next; ifa->lst_next = NULL; - addrconf_del_timer(ifa); - in6_ifa_put(ifa); + __in6_ifa_put(ifa); continue; } bifa = &ifa->lst_next; @@ -2689,31 +2691,51 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_lock_bh(&idev->lock); } #endif - bifa = &idev->addr_list; - while ((ifa = *bifa) != NULL) { - if (how == 0 && (ifa->flags&IFA_F_PERMANENT)) { - /* Retain permanent address on admin down */ + keep_list = NULL; + bifa = &keep_list; + while ((ifa = idev->addr_list) != NULL) { + idev->addr_list = ifa->if_next; + ifa->if_next = NULL; + + addrconf_del_timer(ifa); + + /* If just doing link down, and address is permanent + and not link-local, then retain it. */ + if (how == 0 && + (ifa->flags&IFA_F_PERMANENT) && + !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) { + + /* Move to holding list */ + *bifa = ifa; bifa = &ifa->if_next; - /* Restart DAD if needed when link comes back up */ - if ( !((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) || - idev->cnf.accept_dad <= 0 || - (ifa->flags & IFA_F_NODAD))) - ifa->flags |= IFA_F_TENTATIVE; - } else { - *bifa = ifa->if_next; - ifa->if_next = NULL; + /* If not doing DAD on this address, just keep it. */ + if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) || + idev->cnf.accept_dad <= 0 || + (ifa->flags & IFA_F_NODAD)) + continue; + /* If it was tentative already, no need to notify */ + if (ifa->flags & IFA_F_TENTATIVE) + continue; + + /* Flag it for later restoration when link comes up */ + ifa->flags |= IFA_F_TENTATIVE; + in6_ifa_hold(ifa); + } else { ifa->dead = 1; - write_unlock_bh(&idev->lock); + } + write_unlock_bh(&idev->lock); - __ipv6_ifa_notify(RTM_DELADDR, ifa); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); - in6_ifa_put(ifa); + __ipv6_ifa_notify(RTM_DELADDR, ifa); + atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); + in6_ifa_put(ifa); - write_lock_bh(&idev->lock); - } + write_lock_bh(&idev->lock); } + + idev->addr_list = keep_list; + write_unlock_bh(&idev->lock); /* Step 5: Discard multicast list */ @@ -2739,28 +2761,29 @@ static int addrconf_ifdown(struct net_device *dev, int how) static void addrconf_rs_timer(unsigned long data) { struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; + struct inet6_dev *idev = ifp->idev; - if (ifp->idev->cnf.forwarding) + read_lock(&idev->lock); + if (idev->dead || !(idev->if_flags & IF_READY)) goto out; - if (ifp->idev->if_flags & IF_RA_RCVD) { - /* - * Announcement received after solicitation - * was sent - */ + if (idev->cnf.forwarding) + goto out; + + /* Announcement received after solicitation was sent */ + if (idev->if_flags & IF_RA_RCVD) goto out; - } spin_lock(&ifp->lock); - if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) { + if (ifp->probes++ < idev->cnf.rtr_solicits) { /* The wait after the last probe can be shorter */ addrconf_mod_timer(ifp, AC_RS, - (ifp->probes == ifp->idev->cnf.rtr_solicits) ? - ifp->idev->cnf.rtr_solicit_delay : - ifp->idev->cnf.rtr_solicit_interval); + (ifp->probes == idev->cnf.rtr_solicits) ? + idev->cnf.rtr_solicit_delay : + idev->cnf.rtr_solicit_interval); spin_unlock(&ifp->lock); - ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); + ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); } else { spin_unlock(&ifp->lock); /* @@ -2768,10 +2791,11 @@ static void addrconf_rs_timer(unsigned long data) * assumption any longer. */ printk(KERN_DEBUG "%s: no IPv6 routers present\n", - ifp->idev->dev->name); + idev->dev->name); } out: + read_unlock(&idev->lock); in6_ifa_put(ifp); } @@ -2850,9 +2874,9 @@ static void addrconf_dad_timer(unsigned long data) struct inet6_dev *idev = ifp->idev; struct in6_addr mcaddr; - read_lock_bh(&idev->lock); - if (idev->dead) { - read_unlock_bh(&idev->lock); + read_lock(&idev->lock); + if (idev->dead || !(idev->if_flags & IF_READY)) { + read_unlock(&idev->lock); goto out; } @@ -2864,7 +2888,7 @@ static void addrconf_dad_timer(unsigned long data) ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); - read_unlock_bh(&idev->lock); + read_unlock(&idev->lock); addrconf_dad_completed(ifp); @@ -2874,7 +2898,7 @@ static void addrconf_dad_timer(unsigned long data) ifp->probes--; addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time); spin_unlock(&ifp->lock); - read_unlock_bh(&idev->lock); + read_unlock(&idev->lock); /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 551882b..5e463c43 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -84,18 +84,11 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if ((rule->flags & FIB_RULE_FIND_SADDR) && r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) { struct in6_addr saddr; - unsigned int srcprefs = 0; - - if (flags & RT6_LOOKUP_F_SRCPREF_TMP) - srcprefs |= IPV6_PREFER_SRC_TMP; - if (flags & RT6_LOOKUP_F_SRCPREF_PUBLIC) - srcprefs |= IPV6_PREFER_SRC_PUBLIC; - if (flags & RT6_LOOKUP_F_SRCPREF_COA) - srcprefs |= IPV6_PREFER_SRC_COA; if (ipv6_dev_get_saddr(net, ip6_dst_idev(&rt->u.dst)->dev, - &flp->fl6_dst, srcprefs, + &flp->fl6_dst, + rt6_flags2srcprefs(flags), &saddr)) goto again; if (!ipv6_prefix_equal(&saddr, &r->src.addr, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b08879e..52cd3ef 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -819,15 +819,8 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, if (!ipv6_addr_any(&fl->fl6_src)) flags |= RT6_LOOKUP_F_HAS_SADDR; - else if (sk) { - unsigned int prefs = inet6_sk(sk)->srcprefs; - if (prefs & IPV6_PREFER_SRC_TMP) - flags |= RT6_LOOKUP_F_SRCPREF_TMP; - if (prefs & IPV6_PREFER_SRC_PUBLIC) - flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC; - if (prefs & IPV6_PREFER_SRC_COA) - flags |= RT6_LOOKUP_F_SRCPREF_COA; - } + else if (sk) + flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6963a6b..9b6dbba 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1740,8 +1740,11 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); } - } else - sk_add_backlog(sk, skb); + } else if (unlikely(sk_add_backlog(sk, skb))) { + bh_unlock_sock(sk); + NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); + goto discard_and_relse; + } bh_unlock_sock(sk); sock_put(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 52b8347..3c0c9c7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -583,16 +583,20 @@ static void flush_stack(struct sock **stack, unsigned int count, bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb1); - else - sk_add_backlog(sk, skb1); + else if (sk_add_backlog(sk, skb1)) { + kfree_skb(skb1); + bh_unlock_sock(sk); + goto drop; + } bh_unlock_sock(sk); - } else { - atomic_inc(&sk->sk_drops); - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_INERRORS, IS_UDPLITE(sk)); + continue; } +drop: + atomic_inc(&sk->sk_drops); + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_INERRORS, IS_UDPLITE(sk)); } } /* @@ -754,8 +758,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); + else if (sk_add_backlog(sk, skb)) { + atomic_inc(&sk->sk_drops); + bh_unlock_sock(sk); + sock_put(sk); + goto discard; + } bh_unlock_sock(sk); sock_put(sk); return 0; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index dbdc696..ae18165 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -116,7 +116,8 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, return 0; } -static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct rt6_info *rt = (struct rt6_info*)xdst->route; diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 019c780..86d6985 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1437,7 +1437,7 @@ static void llc_process_tmr_ev(struct sock *sk, struct sk_buff *skb) llc_conn_state_process(sk, skb); else { llc_set_backlog_type(skb, LLC_EVENT); - sk_add_backlog(sk, skb); + __sk_add_backlog(sk, skb); } } } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index a8dde9b..a12144d 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -827,7 +827,8 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) else { dprintk("%s: adding to backlog...\n", __func__); llc_set_backlog_type(skb, LLC_PACKET); - sk_add_backlog(sk, skb); + if (sk_add_backlog(sk, skb)) + goto drop_unlock; } out: bh_unlock_sock(sk); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 9affe2c..b4ddb2f 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -48,20 +48,24 @@ static ssize_t ieee80211_if_write( ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int)) { u8 *buf; - ssize_t ret = -ENODEV; + ssize_t ret; - buf = kzalloc(count, GFP_KERNEL); + buf = kmalloc(count, GFP_KERNEL); if (!buf) return -ENOMEM; + ret = -EFAULT; if (copy_from_user(buf, userbuf, count)) - return -EFAULT; + goto freebuf; + ret = -ENODEV; rtnl_lock(); if (sdata->dev->reg_state == NETREG_REGISTERED) ret = (*write)(sdata, buf, count); rtnl_unlock(); +freebuf: + kfree(buf); return ret; } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index bc4e20e..1a29c4a 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -744,7 +744,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m break; default: /* should not get here, PLINK_BLOCKED is dealt with at the - * beggining of the function + * beginning of the function */ spin_unlock_bh(&sta->lock); break; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 41812a1..be5f723 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -177,7 +177,8 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, sta = sta_info_get(sdata, bssid); if (sta) rate_control_rate_update(local, sband, sta, - IEEE80211_RC_HT_CHANGED); + IEEE80211_RC_HT_CHANGED, + local->oper_channel_type); rcu_read_unlock(); } @@ -435,10 +436,12 @@ static void ieee80211_enable_ps(struct ieee80211_local *local, if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) ieee80211_send_nullfunc(local, sdata, 1); - if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { - conf->flags |= IEEE80211_CONF_PS; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); - } + if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && + (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) + return; + + conf->flags |= IEEE80211_CONF_PS; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } } @@ -557,7 +560,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) ieee80211_send_nullfunc(local, sdata, 1); - if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) || + if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) && + (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) || (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; local->hw.conf.flags |= IEEE80211_CONF_PS; @@ -1893,8 +1897,20 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, mutex_lock(&ifmgd->mtx); if (ifmgd->associated) { - mutex_unlock(&ifmgd->mtx); - return -EALREADY; + if (!req->prev_bssid || + memcmp(req->prev_bssid, ifmgd->associated->bssid, + ETH_ALEN)) { + /* + * We are already associated and the request was not a + * reassociation request from the current BSS, so + * reject it. + */ + mutex_unlock(&ifmgd->mtx); + return -EALREADY; + } + + /* Trying to reassociate - clear previous association state */ + ieee80211_set_disassoc(sdata); } mutex_unlock(&ifmgd->mtx); diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index b6108bc..065a9619 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -66,7 +66,8 @@ static inline void rate_control_rate_init(struct sta_info *sta) static inline void rate_control_rate_update(struct ieee80211_local *local, struct ieee80211_supported_band *sband, - struct sta_info *sta, u32 changed) + struct sta_info *sta, u32 changed, + enum nl80211_channel_type oper_chan_type) { struct rate_control_ref *ref = local->rate_ctrl; struct ieee80211_sta *ista = &sta->sta; @@ -74,7 +75,7 @@ static inline void rate_control_rate_update(struct ieee80211_local *local, if (ref && ref->ops->rate_update) ref->ops->rate_update(ref->priv, sband, ista, - priv_sta, changed); + priv_sta, changed, oper_chan_type); } static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 211c475..56422d8 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -434,6 +434,7 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) /* check if STA exists already */ if (sta_info_get_bss(sdata, sta->sta.addr)) { spin_unlock_irqrestore(&local->sta_lock, flags); + mutex_unlock(&local->sta_mtx); rcu_read_lock(); err = -EEXIST; goto out_free; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 18d77b5..6ac28ef 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -314,8 +314,39 @@ config NETFILTER_XTABLES if NETFILTER_XTABLES +comment "Xtables combined modules" + +config NETFILTER_XT_MARK + tristate 'nfmark target and match support' + default m if NETFILTER_ADVANCED=n + ---help--- + This option adds the "MARK" target and "mark" match. + + Netfilter mark matching allows you to match packets based on the + "nfmark" value in the packet. + The target allows you to create rules in the "mangle" table which alter + the netfilter mark (nfmark) field associated with the packet. + + Prior to routing, the nfmark can influence the routing method (see + "Use netfilter MARK value as routing key") and can also be used by + other subsystems to change their behavior. + +config NETFILTER_XT_CONNMARK + tristate 'ctmark target and match support' + depends on NF_CONNTRACK + depends on NETFILTER_ADVANCED + select NF_CONNTRACK_MARK + ---help--- + This option adds the "CONNMARK" target and "connmark" match. + + Netfilter allows you to store a mark value per connection (a.k.a. + ctmark), similarly to the packet mark (nfmark). Using this + target and match, you can set and match on this mark. + # alphabetically ordered list of targets +comment "Xtables targets" + config NETFILTER_XT_TARGET_CLASSIFY tristate '"CLASSIFY" target support' depends on NETFILTER_ADVANCED @@ -332,15 +363,11 @@ config NETFILTER_XT_TARGET_CONNMARK tristate '"CONNMARK" target support' depends on NF_CONNTRACK depends on NETFILTER_ADVANCED - select NF_CONNTRACK_MARK - help - This option adds a `CONNMARK' target, which allows one to manipulate - the connection mark value. Similar to the MARK target, but - affects the connection mark value rather than the packet mark value. - - If you want to compile it as a module, say M here and read - <file:Documentation/kbuild/modules.txt>. The module will be called - ipt_CONNMARK. If unsure, say `N'. + select NETFILTER_XT_CONNMARK + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module). config NETFILTER_XT_TARGET_CONNSECMARK tristate '"CONNSECMARK" target support' @@ -423,16 +450,12 @@ config NETFILTER_XT_TARGET_LED config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' - default m if NETFILTER_ADVANCED=n - help - This option adds a `MARK' target, which allows you to create rules - in the `mangle' table which alter the netfilter mark (nfmark) field - associated with the packet prior to routing. This can change - the routing method (see `Use netfilter MARK value as routing - key') and can also be used by other subsystems to change their - behavior. - - To compile it as a module, choose M here. If unsure, say N. + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MARK + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_MARK (combined mark/MARK module). config NETFILTER_XT_TARGET_NFLOG tristate '"NFLOG" target support' @@ -552,6 +575,10 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP This option adds a "TCPOPTSTRIP" target, which allows you to strip TCP options from TCP packets. +# alphabetically ordered list of matches + +comment "Xtables matches" + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK @@ -602,14 +629,11 @@ config NETFILTER_XT_MATCH_CONNMARK tristate '"connmark" connection mark match support' depends on NF_CONNTRACK depends on NETFILTER_ADVANCED - select NF_CONNTRACK_MARK - help - This option adds a `connmark' match, which allows you to match the - connection mark value previously set for the session by `CONNMARK'. - - If you want to compile it as a module, say M here and read - <file:Documentation/kbuild/modules.txt>. The module will be called - ipt_connmark. If unsure, say `N'. + select NETFILTER_XT_CONNMARK + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module). config NETFILTER_XT_MATCH_CONNTRACK tristate '"conntrack" connection tracking match support' @@ -733,13 +757,12 @@ config NETFILTER_XT_MATCH_MAC config NETFILTER_XT_MATCH_MARK tristate '"mark" match support' - default m if NETFILTER_ADVANCED=n - help - Netfilter mark matching allows you to match packets based on the - `nfmark' value in the packet. This can be set by the MARK target - (see below). - - To compile it as a module, choose M here. If unsure, say N. + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MARK + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_MARK (combined mark/MARK module). config NETFILTER_XT_MATCH_MULTIPORT tristate '"multiport" Multiple port match support' @@ -836,13 +859,6 @@ config NETFILTER_XT_MATCH_RECENT Short options are available by using 'iptables -m recent -h' Official Website: <http://snowman.net/projects/ipt_recent/> -config NETFILTER_XT_MATCH_RECENT_PROC_COMPAT - bool 'Enable obsolete /proc/net/ipt_recent' - depends on NETFILTER_XT_MATCH_RECENT && PROC_FS - ---help--- - This option enables the old /proc/net/ipt_recent interface, - which has been obsoleted by /proc/net/xt_recent. - config NETFILTER_XT_MATCH_SCTP tristate '"sctp" protocol match support (EXPERIMENTAL)' depends on EXPERIMENTAL diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index f873644..cd31afe 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -40,15 +40,17 @@ obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o +# combos +obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o +obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o + # targets obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o -obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o -obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o @@ -64,7 +66,6 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o -obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o @@ -76,7 +77,6 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o -obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 8dd75d9..c6cd1b8 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -284,7 +284,7 @@ EXPORT_SYMBOL_GPL(ct_sip_parse_request); * tabs, spaces and continuation lines, which are treated as a single whitespace * character. * - * Some headers may appear multiple times. A comma seperated list of values is + * Some headers may appear multiple times. A comma separated list of values is * equivalent to multiple headers. */ static const struct sip_header ct_sip_hdrs[] = { @@ -421,7 +421,7 @@ int ct_sip_get_header(const struct nf_conn *ct, const char *dptr, } EXPORT_SYMBOL_GPL(ct_sip_get_header); -/* Get next header field in a list of comma seperated values */ +/* Get next header field in a list of comma separated values */ static int ct_sip_next_header(const struct nf_conn *ct, const char *dptr, unsigned int dataoff, unsigned int datalen, enum sip_header_types type, diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 8eb0cc2..0794f9a 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -18,12 +18,9 @@ #include <linux/types.h> #include <linux/socket.h> #include <linux/kernel.h> -#include <linux/major.h> -#include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> -#include <linux/fcntl.h> #include <linux/skbuff.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c deleted file mode 100644 index 5934570..0000000 --- a/net/netfilter/xt_CONNMARK.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * xt_CONNMARK - Netfilter module to modify the connection mark values - * - * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com> - * by Henrik Nordstrom <hno@marasystems.com> - * Copyright © CC Computer Consultants GmbH, 2007 - 2008 - * Jan Engelhardt <jengelh@computergmbh.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <net/checksum.h> - -MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>"); -MODULE_DESCRIPTION("Xtables: connection mark modification"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("ipt_CONNMARK"); -MODULE_ALIAS("ip6t_CONNMARK"); - -#include <linux/netfilter/x_tables.h> -#include <linux/netfilter/xt_CONNMARK.h> -#include <net/netfilter/nf_conntrack_ecache.h> - -static unsigned int -connmark_tg(struct sk_buff *skb, const struct xt_target_param *par) -{ - const struct xt_connmark_tginfo1 *info = par->targinfo; - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; - u_int32_t newmark; - - ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) - return XT_CONTINUE; - - switch (info->mode) { - case XT_CONNMARK_SET: - newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; - if (ct->mark != newmark) { - ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, ct); - } - break; - case XT_CONNMARK_SAVE: - newmark = (ct->mark & ~info->ctmask) ^ - (skb->mark & info->nfmask); - if (ct->mark != newmark) { - ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, ct); - } - break; - case XT_CONNMARK_RESTORE: - newmark = (skb->mark & ~info->nfmask) ^ - (ct->mark & info->ctmask); - skb->mark = newmark; - break; - } - - return XT_CONTINUE; -} - -static bool connmark_tg_check(const struct xt_tgchk_param *par) -{ - if (nf_ct_l3proto_try_module_get(par->family) < 0) { - printk(KERN_WARNING "cannot load conntrack support for " - "proto=%u\n", par->family); - return false; - } - return true; -} - -static void connmark_tg_destroy(const struct xt_tgdtor_param *par) -{ - nf_ct_l3proto_module_put(par->family); -} - -static struct xt_target connmark_tg_reg __read_mostly = { - .name = "CONNMARK", - .revision = 1, - .family = NFPROTO_UNSPEC, - .checkentry = connmark_tg_check, - .target = connmark_tg, - .targetsize = sizeof(struct xt_connmark_tginfo1), - .destroy = connmark_tg_destroy, - .me = THIS_MODULE, -}; - -static int __init connmark_tg_init(void) -{ - return xt_register_target(&connmark_tg_reg); -} - -static void __exit connmark_tg_exit(void) -{ - xt_unregister_target(&connmark_tg_reg); -} - -module_init(connmark_tg_init); -module_exit(connmark_tg_exit); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 61c50fa..fda603e 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -37,13 +37,13 @@ static unsigned int xt_ct_target(struct sk_buff *skb, static u8 xt_ct_find_proto(const struct xt_tgchk_param *par) { - if (par->family == AF_INET) { + if (par->family == NFPROTO_IPV4) { const struct ipt_entry *e = par->entryinfo; if (e->ip.invflags & IPT_INV_PROTO) return 0; return e->ip.proto; - } else if (par->family == AF_INET6) { + } else if (par->family == NFPROTO_IPV6) { const struct ip6t_entry *e = par->entryinfo; if (e->ipv6.invflags & IP6T_INV_PROTO) diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c deleted file mode 100644 index 225f8d1..0000000 --- a/net/netfilter/xt_MARK.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * xt_MARK - Netfilter module to modify the NFMARK field of an skb - * - * (C) 1999-2001 Marc Boucher <marc@mbsi.ca> - * Copyright © CC Computer Consultants GmbH, 2007 - 2008 - * Jan Engelhardt <jengelh@computergmbh.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <net/checksum.h> - -#include <linux/netfilter/x_tables.h> -#include <linux/netfilter/xt_MARK.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); -MODULE_DESCRIPTION("Xtables: packet mark modification"); -MODULE_ALIAS("ipt_MARK"); -MODULE_ALIAS("ip6t_MARK"); - -static unsigned int -mark_tg(struct sk_buff *skb, const struct xt_target_param *par) -{ - const struct xt_mark_tginfo2 *info = par->targinfo; - - skb->mark = (skb->mark & ~info->mask) ^ info->mark; - return XT_CONTINUE; -} - -static struct xt_target mark_tg_reg __read_mostly = { - .name = "MARK", - .revision = 2, - .family = NFPROTO_UNSPEC, - .target = mark_tg, - .targetsize = sizeof(struct xt_mark_tginfo2), - .me = THIS_MODULE, -}; - -static int __init mark_tg_init(void) -{ - return xt_register_target(&mark_tg_reg); -} - -static void __exit mark_tg_exit(void) -{ - xt_unregister_target(&mark_tg_reg); -} - -module_init(mark_tg_init); -module_exit(mark_tg_exit); diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 12dcd70..a37e216 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -49,17 +49,6 @@ static u32 hash_v4(const struct sk_buff *skb) return jhash_2words((__force u32)ipaddr, iph->protocol, jhash_initval); } -static unsigned int -nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par) -{ - const struct xt_NFQ_info_v1 *info = par->targinfo; - u32 queue = info->queuenum; - - if (info->queues_total > 1) - queue = hash_v4(skb) % info->queues_total + queue; - return NF_QUEUE_NR(queue); -} - #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) static u32 hash_v6(const struct sk_buff *skb) { @@ -73,18 +62,24 @@ static u32 hash_v6(const struct sk_buff *skb) return jhash2((__force u32 *)addr, ARRAY_SIZE(addr), jhash_initval); } +#endif static unsigned int -nfqueue_tg6_v1(struct sk_buff *skb, const struct xt_target_param *par) +nfqueue_tg_v1(struct sk_buff *skb, const struct xt_target_param *par) { const struct xt_NFQ_info_v1 *info = par->targinfo; u32 queue = info->queuenum; - if (info->queues_total > 1) - queue = hash_v6(skb) % info->queues_total + queue; + if (info->queues_total > 1) { + if (par->target->family == NFPROTO_IPV4) + queue = hash_v4(skb) % info->queues_total + queue; +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) + else if (par->target->family == NFPROTO_IPV6) + queue = hash_v6(skb) % info->queues_total + queue; +#endif + } return NF_QUEUE_NR(queue); } -#endif static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par) { @@ -119,23 +114,12 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = { { .name = "NFQUEUE", .revision = 1, - .family = NFPROTO_IPV4, - .checkentry = nfqueue_tg_v1_check, - .target = nfqueue_tg4_v1, - .targetsize = sizeof(struct xt_NFQ_info_v1), - .me = THIS_MODULE, - }, -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) - { - .name = "NFQUEUE", - .revision = 1, - .family = NFPROTO_IPV6, + .family = NFPROTO_UNSPEC, .checkentry = nfqueue_tg_v1_check, - .target = nfqueue_tg6_v1, + .target = nfqueue_tg_v1, .targetsize = sizeof(struct xt_NFQ_info_v1), .me = THIS_MODULE, }, -#endif }; static int __init nfqueue_tg_init(void) diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index 9dd8c8e..e8b5760 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -3,7 +3,6 @@ * * Copyright (C) 2007 Sven Schnelle <svens@bitebene.org> * Copyright © CC Computer Consultants GmbH, 2007 - * Contact: Jan Engelhardt <jengelh@computergmbh.de> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -136,7 +135,7 @@ static void __exit tcpoptstrip_tg_exit(void) module_init(tcpoptstrip_tg_init); module_exit(tcpoptstrip_tg_exit); -MODULE_AUTHOR("Sven Schnelle <svens@bitebene.org>, Jan Engelhardt <jengelh@computergmbh.de>"); +MODULE_AUTHOR("Sven Schnelle <svens@bitebene.org>, Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: TCP option stripping"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_TCPOPTSTRIP"); diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 26997ce..9e624af 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -5,7 +5,6 @@ * Nov 2002: Martin Bene <martin.bene@icomedias.com>: * only ignore TIME_WAIT or gone connections * (C) CC Computer Consultants GmbH, 2007 - * Contact: <jengelh@computergmbh.de> * * based on ... * diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 122aa8b..97465a4 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -1,10 +1,10 @@ /* - * xt_connmark - Netfilter module to match connection mark values + * xt_connmark - Netfilter module to operate on connection marks * * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com> * by Henrik Nordstrom <hno@marasystems.com> * Copyright © CC Computer Consultants GmbH, 2007 - 2008 - * Jan Engelhardt <jengelh@computergmbh.de> + * Jan Engelhardt <jengelh@medozas.de> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,15 +24,71 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_ecache.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_connmark.h> MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>"); -MODULE_DESCRIPTION("Xtables: connection mark match"); +MODULE_DESCRIPTION("Xtables: connection mark operations"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_CONNMARK"); +MODULE_ALIAS("ip6t_CONNMARK"); MODULE_ALIAS("ipt_connmark"); MODULE_ALIAS("ip6t_connmark"); +static unsigned int +connmark_tg(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_connmark_tginfo1 *info = par->targinfo; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + u_int32_t newmark; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return XT_CONTINUE; + + switch (info->mode) { + case XT_CONNMARK_SET: + newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; + if (ct->mark != newmark) { + ct->mark = newmark; + nf_conntrack_event_cache(IPCT_MARK, ct); + } + break; + case XT_CONNMARK_SAVE: + newmark = (ct->mark & ~info->ctmask) ^ + (skb->mark & info->nfmask); + if (ct->mark != newmark) { + ct->mark = newmark; + nf_conntrack_event_cache(IPCT_MARK, ct); + } + break; + case XT_CONNMARK_RESTORE: + newmark = (skb->mark & ~info->nfmask) ^ + (ct->mark & info->ctmask); + skb->mark = newmark; + break; + } + + return XT_CONTINUE; +} + +static bool connmark_tg_check(const struct xt_tgchk_param *par) +{ + if (nf_ct_l3proto_try_module_get(par->family) < 0) { + printk(KERN_WARNING "cannot load conntrack support for " + "proto=%u\n", par->family); + return false; + } + return true; +} + +static void connmark_tg_destroy(const struct xt_tgdtor_param *par) +{ + nf_ct_l3proto_module_put(par->family); +} + static bool connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par) { @@ -62,6 +118,17 @@ static void connmark_mt_destroy(const struct xt_mtdtor_param *par) nf_ct_l3proto_module_put(par->family); } +static struct xt_target connmark_tg_reg __read_mostly = { + .name = "CONNMARK", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = connmark_tg_check, + .target = connmark_tg, + .targetsize = sizeof(struct xt_connmark_tginfo1), + .destroy = connmark_tg_destroy, + .me = THIS_MODULE, +}; + static struct xt_match connmark_mt_reg __read_mostly = { .name = "connmark", .revision = 1, @@ -75,12 +142,23 @@ static struct xt_match connmark_mt_reg __read_mostly = { static int __init connmark_mt_init(void) { - return xt_register_match(&connmark_mt_reg); + int ret; + + ret = xt_register_target(&connmark_tg_reg); + if (ret < 0) + return ret; + ret = xt_register_match(&connmark_mt_reg); + if (ret < 0) { + xt_unregister_target(&connmark_tg_reg); + return ret; + } + return 0; } static void __exit connmark_mt_exit(void) { xt_unregister_match(&connmark_mt_reg); + xt_unregister_target(&connmark_tg_reg); } module_init(connmark_mt_init); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index d952806..ba9601a 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -1,6 +1,6 @@ /* * xt_hashlimit - Netfilter module to limit the number of packets per time - * seperately for each hashbucket (sourceip/sourceport/dstip/dstport) + * separately for each hashbucket (sourceip/sourceport/dstip/dstport) * * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> * Copyright © CC Computer Consultants GmbH, 2007 - 2008 @@ -36,7 +36,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match"); MODULE_ALIAS("ipt_hashlimit"); MODULE_ALIAS("ip6t_hashlimit"); diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 1db07d8..035c468 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -18,9 +18,20 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); -MODULE_DESCRIPTION("Xtables: packet mark match"); +MODULE_DESCRIPTION("Xtables: packet mark operations"); MODULE_ALIAS("ipt_mark"); MODULE_ALIAS("ip6t_mark"); +MODULE_ALIAS("ipt_MARK"); +MODULE_ALIAS("ip6t_MARK"); + +static unsigned int +mark_tg(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_mark_tginfo2 *info = par->targinfo; + + skb->mark = (skb->mark & ~info->mask) ^ info->mark; + return XT_CONTINUE; +} static bool mark_mt(const struct sk_buff *skb, const struct xt_match_param *par) @@ -30,6 +41,15 @@ mark_mt(const struct sk_buff *skb, const struct xt_match_param *par) return ((skb->mark & info->mask) == info->mark) ^ info->invert; } +static struct xt_target mark_tg_reg __read_mostly = { + .name = "MARK", + .revision = 2, + .family = NFPROTO_UNSPEC, + .target = mark_tg, + .targetsize = sizeof(struct xt_mark_tginfo2), + .me = THIS_MODULE, +}; + static struct xt_match mark_mt_reg __read_mostly = { .name = "mark", .revision = 1, @@ -41,12 +61,23 @@ static struct xt_match mark_mt_reg __read_mostly = { static int __init mark_mt_init(void) { - return xt_register_match(&mark_mt_reg); + int ret; + + ret = xt_register_target(&mark_tg_reg); + if (ret < 0) + return ret; + ret = xt_register_match(&mark_mt_reg); + if (ret < 0) { + xt_unregister_target(&mark_tg_reg); + return ret; + } + return 0; } static void __exit mark_mt_exit(void) { xt_unregister_match(&mark_mt_reg); + xt_unregister_target(&mark_tg_reg); } module_init(mark_mt_init); diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 7073dbb..d2e7c80 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -34,8 +34,8 @@ #include <linux/netfilter/xt_recent.h> MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); -MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); +MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_recent"); MODULE_ALIAS("ip6t_recent"); @@ -83,9 +83,6 @@ struct recent_net { struct list_head tables; #ifdef CONFIG_PROC_FS struct proc_dir_entry *xt_recent; -#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT - struct proc_dir_entry *ipt_recent; -#endif #endif }; @@ -146,6 +143,25 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) t->entries--; } +/* + * Drop entries with timestamps older then 'time'. + */ +static void recent_entry_reap(struct recent_table *t, unsigned long time) +{ + struct recent_entry *e; + + /* + * The head of the LRU list is always the oldest entry. + */ + e = list_entry(t->lru_list.next, struct recent_entry, lru_list); + + /* + * The last time stamp is the most recent. + */ + if (time_after(time, e->stamps[e->index-1])) + recent_entry_remove(t, e); +} + static struct recent_entry * recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr, u_int16_t family, u_int8_t ttl) @@ -272,6 +288,10 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par) break; } } + + /* info->seconds must be non-zero */ + if (info->check_set & XT_RECENT_REAP) + recent_entry_reap(t, time); } if (info->check_set & XT_RECENT_SET || @@ -299,12 +319,20 @@ static bool recent_mt_check(const struct xt_mtchk_param *par) get_random_bytes(&hash_rnd, sizeof(hash_rnd)); hash_rnd_inited = true; } + if (info->check_set & ~XT_RECENT_VALID_FLAGS) { + pr_info(KBUILD_MODNAME ": Unsupported user space flags " + "(%08x)\n", info->check_set); + return false; + } if (hweight8(info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE | XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1) return false; if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) && - (info->seconds || info->hit_count)) + (info->seconds || info->hit_count || + (info->check_set & XT_RECENT_MODIFIERS))) + return false; + if ((info->check_set & XT_RECENT_REAP) && !info->seconds) return false; if (info->hit_count > ip_pkt_list_tot) { pr_info(KBUILD_MODNAME ": hitcount (%u) is larger than " @@ -342,17 +370,6 @@ static bool recent_mt_check(const struct xt_mtchk_param *par) } pde->uid = ip_list_uid; pde->gid = ip_list_gid; -#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT - pde = proc_create_data(t->name, ip_list_perms, recent_net->ipt_recent, - &recent_old_fops, t); - if (pde == NULL) { - remove_proc_entry(t->name, recent_net->xt_recent); - kfree(t); - goto out; - } - pde->uid = ip_list_uid; - pde->gid = ip_list_gid; -#endif #endif spin_lock_bh(&recent_lock); list_add_tail(&t->list, &recent_net->tables); @@ -376,9 +393,6 @@ static void recent_mt_destroy(const struct xt_mtdtor_param *par) list_del(&t->list); spin_unlock_bh(&recent_lock); #ifdef CONFIG_PROC_FS -#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT - remove_proc_entry(t->name, recent_net->ipt_recent); -#endif remove_proc_entry(t->name, recent_net->xt_recent); #endif recent_table_flush(t); @@ -470,84 +484,6 @@ static int recent_seq_open(struct inode *inode, struct file *file) return 0; } -#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT -static int recent_old_seq_open(struct inode *inode, struct file *filp) -{ - static bool warned_of_old; - - if (unlikely(!warned_of_old)) { - printk(KERN_INFO KBUILD_MODNAME ": Use of /proc/net/ipt_recent" - " is deprecated; use /proc/net/xt_recent.\n"); - warned_of_old = true; - } - return recent_seq_open(inode, filp); -} - -static ssize_t recent_old_proc_write(struct file *file, - const char __user *input, - size_t size, loff_t *loff) -{ - const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); - struct recent_table *t = pde->data; - struct recent_entry *e; - char buf[sizeof("+255.255.255.255")], *c = buf; - union nf_inet_addr addr = {}; - int add; - - if (size > sizeof(buf)) - size = sizeof(buf); - if (copy_from_user(buf, input, size)) - return -EFAULT; - - c = skip_spaces(c); - - if (size - (c - buf) < 5) - return c - buf; - if (!strncmp(c, "clear", 5)) { - c += 5; - spin_lock_bh(&recent_lock); - recent_table_flush(t); - spin_unlock_bh(&recent_lock); - return c - buf; - } - - switch (*c) { - case '-': - add = 0; - c++; - break; - case '+': - c++; - default: - add = 1; - break; - } - addr.ip = in_aton(c); - - spin_lock_bh(&recent_lock); - e = recent_entry_lookup(t, &addr, NFPROTO_IPV4, 0); - if (e == NULL) { - if (add) - recent_entry_init(t, &addr, NFPROTO_IPV4, 0); - } else { - if (add) - recent_entry_update(t, e); - else - recent_entry_remove(t, e); - } - spin_unlock_bh(&recent_lock); - return size; -} - -static const struct file_operations recent_old_fops = { - .open = recent_old_seq_open, - .read = seq_read, - .write = recent_old_proc_write, - .release = seq_release_private, - .owner = THIS_MODULE, -}; -#endif - static ssize_t recent_mt_proc_write(struct file *file, const char __user *input, size_t size, loff_t *loff) @@ -636,21 +572,11 @@ static int __net_init recent_proc_net_init(struct net *net) recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net); if (!recent_net->xt_recent) return -ENOMEM; -#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT - recent_net->ipt_recent = proc_mkdir("ipt_recent", net->proc_net); - if (!recent_net->ipt_recent) { - proc_net_remove(net, "xt_recent"); - return -ENOMEM; - } -#endif return 0; } static void __net_exit recent_proc_net_exit(struct net *net) { -#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT - proc_net_remove(net, "ipt_recent"); -#endif proc_net_remove(net, "xt_recent"); } #else diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 93acaa5..9a9c9a3 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -1,7 +1,6 @@ /* * xt_time * Copyright © CC Computer Consultants GmbH, 2007 - * Contact: <jengelh@computergmbh.de> * * based on ipt_time by Fabrice MARIE <fabrice@netfilter.org> * This is a module which is used for time matching @@ -264,7 +263,7 @@ static void __exit time_mt_exit(void) module_init(time_mt_init); module_exit(time_mt_exit); -MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: time-based matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_time"); diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c index 24a5276..d7c05f0 100644 --- a/net/netfilter/xt_u32.c +++ b/net/netfilter/xt_u32.c @@ -3,7 +3,6 @@ * * Original author: Don Cohen <don@isis.cs3-inc.com> * (C) CC Computer Consultants GmbH, 2007 - * Contact: <jengelh@computergmbh.de> */ #include <linux/module.h> @@ -117,7 +116,7 @@ static void __exit u32_mt_exit(void) module_init(u32_mt_init); module_exit(u32_mt_exit); -MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: arbitrary byte matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_u32"); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 031a5e6..1612d41 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1688,6 +1688,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, { switch (i->type) { case PACKET_MR_MULTICAST: + if (i->alen != dev->addr_len) + return -EINVAL; if (what > 0) return dev_mc_add(dev, i->addr, i->alen, 0); else @@ -1700,6 +1702,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, return dev_set_allmulti(dev, what); break; case PACKET_MR_UNICAST: + if (i->alen != dev->addr_len) + return -EINVAL; if (what > 0) return dev_unicast_add(dev, i->addr); else @@ -1734,7 +1738,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) goto done; err = -EINVAL; - if (mreq->mr_alen != dev->addr_len) + if (mreq->mr_alen > dev->addr_len) goto done; err = -ENOBUFS; diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index c597cc5..5c6ae0c 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -107,8 +107,7 @@ static void phonet_device_destroy(struct net_device *dev) if (pnd) { u8 addr; - for (addr = find_first_bit(pnd->addrs, 64); addr < 64; - addr = find_next_bit(pnd->addrs, 64, 1+addr)) + for_each_set_bit(addr, pnd->addrs, 64) phonet_address_notify(RTM_DELADDR, dev, addr); kfree(pnd); } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 2e6c7eb..fe2e708 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -141,8 +141,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) continue; addr_idx = 0; - for (addr = find_first_bit(pnd->addrs, 64); addr < 64; - addr = find_next_bit(pnd->addrs, 64, 1+addr)) { + for_each_set_bit(addr, pnd->addrs, 64) { if (addr_idx++ < addr_start_idx) continue; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 853c52b..937ecda 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -159,7 +159,8 @@ static unsigned int rds_poll(struct file *file, struct socket *sock, poll_wait(file, sk->sk_sleep, wait); - poll_wait(file, &rds_poll_waitq, wait); + if (rs->rs_seen_congestion) + poll_wait(file, &rds_poll_waitq, wait); read_lock_irqsave(&rs->rs_recv_lock, flags); if (!rs->rs_cong_monitor) { @@ -181,6 +182,10 @@ static unsigned int rds_poll(struct file *file, struct socket *sock, mask |= (POLLOUT | POLLWRNORM); read_unlock_irqrestore(&rs->rs_recv_lock, flags); + /* clear state any time we wake a seen-congested socket */ + if (mask) + rs->rs_seen_congestion = 0; + return mask; } diff --git a/net/rds/cong.c b/net/rds/cong.c index 6d06cac..dd2711d 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -218,8 +218,6 @@ void rds_cong_queue_updates(struct rds_cong_map *map) spin_lock_irqsave(&rds_cong_lock, flags); list_for_each_entry(conn, &map->m_conn_list, c_map_item) { - if (conn->c_loopback) - continue; if (!test_and_set_bit(0, &conn->c_map_queued)) { rds_stats_inc(s_cong_update_queued); queue_delayed_work(rds_wq, &conn->c_send_w, 0); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 647cb8f..e1f124b 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -203,9 +203,10 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data) rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); break; default: - rds_ib_conn_error(conn, "RDS/IB: Fatal QP Event %u " + rdsdebug("Fatal QP Event %u " "- connection %pI4->%pI4, reconnecting\n", event->event, &conn->c_laddr, &conn->c_faddr); + rds_conn_drop(conn); break; } } diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 4b0da86..cfb1d90 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -234,8 +234,8 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) { flush_workqueue(rds_wq); rds_ib_flush_mr_pool(pool, 1); - BUG_ON(atomic_read(&pool->item_count)); - BUG_ON(atomic_read(&pool->free_pinned)); + WARN_ON(atomic_read(&pool->item_count)); + WARN_ON(atomic_read(&pool->free_pinned)); kfree(pool); } @@ -440,6 +440,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr) /* FIXME we need a way to tell a r/w MR * from a r/o MR */ + BUG_ON(in_interrupt()); set_page_dirty(page); put_page(page); } diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 04dc0d3..c338881 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -468,8 +468,8 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); rds_ib_stats_inc(s_ib_ack_send_failure); - /* Need to finesse this later. */ - BUG(); + + rds_ib_conn_error(ic->conn, "sending ack failed\n"); } else rds_ib_stats_inc(s_ib_ack_sent); } diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index a10fab6..17fa808 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -243,8 +243,12 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) struct rds_message *rm; rm = rds_send_get_message(conn, send->s_op); - if (rm) + if (rm) { + if (rm->m_rdma_op) + rds_ib_send_unmap_rdma(ic, rm->m_rdma_op); rds_ib_send_rdma_complete(rm, wc.status); + rds_message_put(rm); + } } oldest = (oldest + 1) % ic->i_send_ring.w_nr; @@ -482,6 +486,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, BUG_ON(off % RDS_FRAG_SIZE); BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); + /* Do not send cong updates to IB loopback */ + if (conn->c_loopback + && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { + rds_cong_map_updated(conn->c_fcong, ~(u64) 0); + return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + } + /* FIXME we may overallocate here */ if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) i = 1; @@ -574,8 +585,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); adv_credits += posted; BUG_ON(adv_credits > 255); - } else if (ic->i_rm != rm) - BUG(); + } send = &ic->i_sends[pos]; first = send; @@ -714,8 +724,8 @@ add_header: ic->i_rm = prev->s_rm; prev->s_rm = NULL; } - /* Finesse this later */ - BUG(); + + rds_ib_conn_error(ic->conn, "ib_post_send failed\n"); goto out; } diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 394cf6b..6bc638f 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -156,9 +156,11 @@ static void rds_iw_qp_event_handler(struct ib_event *event, void *data) case IB_EVENT_QP_REQ_ERR: case IB_EVENT_QP_FATAL: default: - rds_iw_conn_error(conn, "RDS/IW: Fatal QP Event %u - connection %pI4->%pI4...reconnecting\n", + rdsdebug("Fatal QP Event %u " + "- connection %pI4->%pI4, reconnecting\n", event->event, &conn->c_laddr, &conn->c_faddr); + rds_conn_drop(conn); break; } } diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c index 54af7d6..337e4e5 100644 --- a/net/rds/iw_recv.c +++ b/net/rds/iw_recv.c @@ -468,8 +468,8 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); rds_iw_stats_inc(s_iw_ack_send_failure); - /* Need to finesse this later. */ - BUG(); + + rds_iw_conn_error(ic->conn, "sending ack failed\n"); } else rds_iw_stats_inc(s_iw_ack_sent); } diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 1379e9d..52182ff 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -616,8 +616,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, rds_iw_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); adv_credits += posted; BUG_ON(adv_credits > 255); - } else if (ic->i_rm != rm) - BUG(); + } send = &ic->i_sends[pos]; first = send; diff --git a/net/rds/loop.c b/net/rds/loop.c index 4a61997..93a45f1 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -80,16 +80,9 @@ static int rds_loop_xmit_cong_map(struct rds_connection *conn, struct rds_cong_map *map, unsigned long offset) { - unsigned long i; - BUG_ON(offset); BUG_ON(map != conn->c_lcong); - for (i = 0; i < RDS_CONG_MAP_PAGES; i++) { - memcpy((void *)conn->c_fcong->m_page_addrs[i], - (void *)map->m_page_addrs[i], PAGE_SIZE); - } - rds_cong_map_updated(conn->c_fcong, ~(u64) 0); return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 4c64daa..61b359d 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -438,8 +438,10 @@ void rds_rdma_free_op(struct rds_rdma_op *ro) /* Mark page dirty if it was possibly modified, which * is the case for a RDMA_READ which copies from remote * to local memory */ - if (!ro->r_write) + if (!ro->r_write) { + BUG_ON(in_interrupt()); set_page_dirty(page); + } put_page(page); } diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 9ece910..5ea82fc 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -101,7 +101,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_DISCONNECTED: - printk(KERN_WARNING "RDS/RDMA: DISCONNECT event - dropping connection " + rdsdebug("DISCONNECT event - dropping connection " "%pI4->%pI4\n", &conn->c_laddr, &conn->c_faddr); rds_conn_drop(conn); @@ -109,8 +109,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, default: /* things like device disconnect? */ - printk(KERN_ERR "unknown event %u\n", event->event); - BUG(); + printk(KERN_ERR "RDS: unknown event %u!\n", event->event); break; } diff --git a/net/rds/rds.h b/net/rds/rds.h index 85d6f89..4bec6e2 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -388,6 +388,8 @@ struct rds_sock { /* flag indicating we were congested or not */ int rs_congested; + /* seen congestion (ENOBUFS) when sending? */ + int rs_seen_congestion; /* rs_lock protects all these adjacent members before the newline */ spinlock_t rs_lock; diff --git a/net/rds/send.c b/net/rds/send.c index b2fccfc..4629a0b 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -507,12 +507,13 @@ EXPORT_SYMBOL_GPL(rds_send_get_message); */ void rds_send_remove_from_sock(struct list_head *messages, int status) { - unsigned long flags = 0; /* silence gcc :P */ + unsigned long flags; struct rds_sock *rs = NULL; struct rds_message *rm; - local_irq_save(flags); while (!list_empty(messages)) { + int was_on_sock = 0; + rm = list_entry(messages->next, struct rds_message, m_conn_item); list_del_init(&rm->m_conn_item); @@ -527,20 +528,19 @@ void rds_send_remove_from_sock(struct list_head *messages, int status) * while we're messing with it. It does not prevent the * message from being removed from the socket, though. */ - spin_lock(&rm->m_rs_lock); + spin_lock_irqsave(&rm->m_rs_lock, flags); if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) goto unlock_and_drop; if (rs != rm->m_rs) { if (rs) { - spin_unlock(&rs->rs_lock); rds_wake_sk_sleep(rs); sock_put(rds_rs_to_sk(rs)); } rs = rm->m_rs; - spin_lock(&rs->rs_lock); sock_hold(rds_rs_to_sk(rs)); } + spin_lock(&rs->rs_lock); if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { struct rds_rdma_op *ro = rm->m_rdma_op; @@ -557,21 +557,22 @@ void rds_send_remove_from_sock(struct list_head *messages, int status) notifier->n_status = status; rm->m_rdma_op->r_notifier = NULL; } - rds_message_put(rm); + was_on_sock = 1; rm->m_rs = NULL; } + spin_unlock(&rs->rs_lock); unlock_and_drop: - spin_unlock(&rm->m_rs_lock); + spin_unlock_irqrestore(&rm->m_rs_lock, flags); rds_message_put(rm); + if (was_on_sock) + rds_message_put(rm); } if (rs) { - spin_unlock(&rs->rs_lock); rds_wake_sk_sleep(rs); sock_put(rds_rs_to_sk(rs)); } - local_irq_restore(flags); } /* @@ -633,9 +634,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) list_move(&rm->m_sock_item, &list); rds_send_sndbuf_remove(rs, rm); clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); - - /* If this is a RDMA operation, notify the app. */ - __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED); } /* order flag updates with the rs lock */ @@ -644,9 +642,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) spin_unlock_irqrestore(&rs->rs_lock, flags); - if (wake) - rds_wake_sk_sleep(rs); - conn = NULL; /* now remove the messages from the conn list as needed */ @@ -654,6 +649,10 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) /* We do this here rather than in the loop above, so that * we don't have to nest m_rs_lock under rs->rs_lock */ spin_lock_irqsave(&rm->m_rs_lock, flags2); + /* If this is a RDMA operation, notify the app. */ + spin_lock(&rs->rs_lock); + __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED); + spin_unlock(&rs->rs_lock); rm->m_rs = NULL; spin_unlock_irqrestore(&rm->m_rs_lock, flags2); @@ -682,6 +681,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) if (conn) spin_unlock_irqrestore(&conn->c_lock, flags); + if (wake) + rds_wake_sk_sleep(rs); + while (!list_empty(&list)) { rm = list_entry(list.next, struct rds_message, m_sock_item); list_del_init(&rm->m_sock_item); @@ -815,7 +817,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, int ret = 0; int queued = 0, allocated_mr = 0; int nonblock = msg->msg_flags & MSG_DONTWAIT; - long timeo = sock_rcvtimeo(sk, nonblock); + long timeo = sock_sndtimeo(sk, nonblock); /* Mirror Linux UDP mirror of BSD error message compatibility */ /* XXX: Perhaps MSG_MORE someday */ @@ -894,8 +896,10 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, queue_delayed_work(rds_wq, &conn->c_conn_w, 0); ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); - if (ret) + if (ret) { + rs->rs_seen_congestion = 1; goto out; + } while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port, dport, &queued)) { diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index c00daff..40bfcf8 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -97,6 +97,7 @@ int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, goto out; } + rds_stats_add(s_copy_to_user, to_copy); size -= to_copy; ret += to_copy; skb_off += to_copy; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 34fdcc0..a28b895 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -240,7 +240,9 @@ void rds_tcp_write_space(struct sock *sk) tc->t_last_seen_una = rds_tcp_snd_una(tc); rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked); - queue_delayed_work(rds_wq, &conn->c_send_w, 0); + if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) + queue_delayed_work(rds_wq, &conn->c_send_w, 0); + out: read_unlock(&sk->sk_callback_lock); diff --git a/net/rds/threads.c b/net/rds/threads.c index 00fa10e..786c20e 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -259,7 +259,7 @@ void rds_threads_exit(void) int __init rds_threads_init(void) { - rds_wq = create_singlethread_workqueue("krdsd"); + rds_wq = create_workqueue("krdsd"); if (rds_wq == NULL) return -ENOMEM; diff --git a/net/rfkill/input.c b/net/rfkill/input.c index a7295ad..3713d7e 100644 --- a/net/rfkill/input.c +++ b/net/rfkill/input.c @@ -212,6 +212,9 @@ static void rfkill_event(struct input_handle *handle, unsigned int type, case KEY_WIMAX: rfkill_schedule_toggle(RFKILL_TYPE_WIMAX); break; + case KEY_RFKILL: + rfkill_schedule_toggle(RFKILL_TYPE_ALL); + break; } } else if (type == EV_SW && code == SW_RFKILL_ALL) rfkill_schedule_evsw_rfkillall(data); @@ -295,6 +298,11 @@ static const struct input_device_id rfkill_ids[] = { .keybit = { [BIT_WORD(KEY_WIMAX)] = BIT_MASK(KEY_WIMAX) }, }, { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, + .evbit = { BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_RFKILL)] = BIT_MASK(KEY_RFKILL) }, + }, + { .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_SWBIT, .evbit = { BIT(EV_SW) }, .swbit = { [BIT_WORD(SW_RFKILL_ALL)] = BIT_MASK(SW_RFKILL_ALL) }, diff --git a/net/sctp/input.c b/net/sctp/input.c index c0c973e..3d74b26 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -75,7 +75,7 @@ static struct sctp_association *__sctp_lookup_association( const union sctp_addr *peer, struct sctp_transport **pt); -static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb); +static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb); /* Calculate the SCTP checksum of an SCTP packet. */ @@ -265,8 +265,13 @@ int sctp_rcv(struct sk_buff *skb) } if (sock_owned_by_user(sk)) { + if (sctp_add_backlog(sk, skb)) { + sctp_bh_unlock_sock(sk); + sctp_chunk_free(chunk); + skb = NULL; /* sctp_chunk_free already freed the skb */ + goto discard_release; + } SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); - sctp_add_backlog(sk, skb); } else { SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); @@ -336,8 +341,10 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) sctp_bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - sk_add_backlog(sk, skb); - backloged = 1; + if (sk_add_backlog(sk, skb)) + sctp_chunk_free(chunk); + else + backloged = 1; } else sctp_inq_push(inqueue, chunk); @@ -362,22 +369,27 @@ done: return 0; } -static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb) +static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; struct sctp_ep_common *rcvr = chunk->rcvr; + int ret; - /* Hold the assoc/ep while hanging on the backlog queue. - * This way, we know structures we need will not disappear from us - */ - if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) - sctp_association_hold(sctp_assoc(rcvr)); - else if (SCTP_EP_TYPE_SOCKET == rcvr->type) - sctp_endpoint_hold(sctp_ep(rcvr)); - else - BUG(); + ret = sk_add_backlog(sk, skb); + if (!ret) { + /* Hold the assoc/ep while hanging on the backlog queue. + * This way, we know structures we need will not disappear + * from us + */ + if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) + sctp_association_hold(sctp_assoc(rcvr)); + else if (SCTP_EP_TYPE_SOCKET == rcvr->type) + sctp_endpoint_hold(sctp_ep(rcvr)); + else + BUG(); + } + return ret; - sk_add_backlog(sk, skb); } /* Handle icmp frag needed error. */ diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 4e4ca65..500886b 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -475,7 +475,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, * used to provide an upper bound to this doubling operation. * * Special Case: the first HB doesn't trigger exponential backoff. - * The first unacknowleged HB triggers it. We do this with a flag + * The first unacknowledged HB triggers it. We do this with a flag * that indicates that we have an outstanding HB. */ if (!is_hb || transport->hb_sent) { diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f6d1e59..dfc5c12 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3720,6 +3720,9 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) SCTP_DBG_OBJCNT_INC(sock); percpu_counter_inc(&sctp_sockets_allocated); + /* Set socket backlog limit. */ + sk->sk_backlog.limit = sysctl_sctp_rmem[1]; + local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); local_bh_enable(); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 7018eef..f96c2fe 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -160,16 +160,15 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) (void)rpc_ntop(sap, buf, sizeof(buf)); xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); + snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; - (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", - NIPQUAD(sin->sin_addr.s_addr)); + snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); - (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); + snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); /* netid */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7124129..75ab08e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -297,12 +297,11 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) switch (sap->sa_family) { case AF_INET: sin = xs_addr_in(xprt); - (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", - NIPQUAD(sin->sin_addr.s_addr)); + snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); break; case AF_INET6: sin6 = xs_addr_in6(xprt); - (void)snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); + snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); break; default: BUG(); @@ -315,10 +314,10 @@ static void xs_format_common_peer_ports(struct rpc_xprt *xprt) struct sockaddr *sap = xs_addr(xprt); char buf[128]; - (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); + snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); - (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); + snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); } diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index a3bfd40..90a0519 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -558,10 +558,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, struct tipc_media_addr *unused2) { - static int send_count = 0; - int bp_index; - int swap_time; /* Prepare buffer for broadcasting (if first time trying to send it) */ @@ -575,11 +572,6 @@ static int tipc_bcbearer_send(struct sk_buff *buf, msg_set_mc_netid(msg, tipc_net_id); } - /* Determine if bearer pairs should be swapped following this attempt */ - - if ((swap_time = (++send_count >= 10))) - send_count = 0; - /* Send buffer over bearers until all targets reached */ bcbearer->remains = tipc_cltr_bcast_nodes; @@ -595,21 +587,22 @@ static int tipc_bcbearer_send(struct sk_buff *buf, if (bcbearer->remains_new.count == bcbearer->remains.count) continue; /* bearer pair doesn't add anything */ - if (!p->publ.blocked && - !p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) { - if (swap_time && s && !s->publ.blocked) - goto swap; - else - goto update; + if (p->publ.blocked || + p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) { + /* unable to send on primary bearer */ + if (!s || s->publ.blocked || + s->media->send_msg(buf, &s->publ, + &s->media->bcast_addr)) { + /* unable to send on either bearer */ + continue; + } + } + + if (s) { + bcbearer->bpairs[bp_index].primary = s; + bcbearer->bpairs[bp_index].secondary = p; } - if (!s || s->publ.blocked || - s->media->send_msg(buf, &s->publ, &s->media->bcast_addr)) - continue; /* unable to send using bearer pair */ -swap: - bcbearer->bpairs[bp_index].primary = s; - bcbearer->bpairs[bp_index].secondary = p; -update: if (bcbearer->remains_new.count == 0) return 0; diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 327011f..7809137 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -45,10 +45,10 @@ #define MAX_ADDR_STR 32 -static struct media *media_list = NULL; +static struct media media_list[MAX_MEDIA]; static u32 media_count = 0; -struct bearer *tipc_bearers = NULL; +struct bearer tipc_bearers[MAX_BEARERS]; /** * media_name_valid - validate media name @@ -108,9 +108,11 @@ int tipc_register_media(u32 media_type, int res = -EINVAL; write_lock_bh(&tipc_net_lock); - if (!media_list) - goto exit; + if (tipc_mode != TIPC_NET_MODE) { + warn("Media <%s> rejected, not in networked mode yet\n", name); + goto exit; + } if (!media_name_valid(name)) { warn("Media <%s> rejected, illegal name\n", name); goto exit; @@ -660,33 +662,10 @@ int tipc_disable_bearer(const char *name) -int tipc_bearer_init(void) -{ - int res; - - write_lock_bh(&tipc_net_lock); - tipc_bearers = kcalloc(MAX_BEARERS, sizeof(struct bearer), GFP_ATOMIC); - media_list = kcalloc(MAX_MEDIA, sizeof(struct media), GFP_ATOMIC); - if (tipc_bearers && media_list) { - res = 0; - } else { - kfree(tipc_bearers); - kfree(media_list); - tipc_bearers = NULL; - media_list = NULL; - res = -ENOMEM; - } - write_unlock_bh(&tipc_net_lock); - return res; -} - void tipc_bearer_stop(void) { u32 i; - if (!tipc_bearers) - return; - for (i = 0; i < MAX_BEARERS; i++) { if (tipc_bearers[i].active) tipc_bearers[i].publ.blocked = 1; @@ -695,10 +674,6 @@ void tipc_bearer_stop(void) if (tipc_bearers[i].active) bearer_disable(tipc_bearers[i].publ.name); } - kfree(tipc_bearers); - kfree(media_list); - tipc_bearers = NULL; - media_list = NULL; media_count = 0; } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index ca57348..000228e 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -114,7 +114,7 @@ struct bearer_name { struct link; -extern struct bearer *tipc_bearers; +extern struct bearer tipc_bearers[]; void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); diff --git a/net/tipc/link.c b/net/tipc/link.c index 6f50f64..49f2be8 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1553,7 +1553,7 @@ u32 tipc_link_push_packet(struct link *l_ptr) /* Continue retransmission now, if there is anything: */ - if (r_q_size && buf && !skb_cloned(buf)) { + if (r_q_size && buf) { msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) { @@ -1722,15 +1722,16 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, dbg("Retransmitting %u in link %x\n", retransmits, l_ptr); if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) { - if (!skb_cloned(buf)) { + if (l_ptr->retransm_queue_size == 0) { msg_dbg(msg, ">NO_RETR->BCONG>"); dbg_print_link(l_ptr, " "); l_ptr->retransm_queue_head = msg_seqno(msg); l_ptr->retransm_queue_size = retransmits; - return; } else { - /* Don't retransmit if driver already has the buffer */ + err("Unexpected retransmit on link %s (qsize=%d)\n", + l_ptr->name, l_ptr->retransm_queue_size); } + return; } else { /* Detect repeated retransmit failures on uncongested bearer */ @@ -1745,7 +1746,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, } } - while (retransmits && (buf != l_ptr->next_out) && buf && !skb_cloned(buf)) { + while (retransmits && (buf != l_ptr->next_out) && buf) { msg = buf_msg(buf); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); @@ -1882,6 +1883,15 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) (msg_destnode(msg) != tipc_own_addr))) goto cont; + /* Discard non-routeable messages destined for another node */ + + if (unlikely(!msg_isdata(msg) && + (msg_destnode(msg) != tipc_own_addr))) { + if ((msg_user(msg) != CONN_MANAGER) && + (msg_user(msg) != MSG_FRAGMENTER)) + goto cont; + } + /* Locate unicast link endpoint that should handle message */ n_ptr = tipc_node_find(msg_prevnode(msg)); diff --git a/net/tipc/net.c b/net/tipc/net.c index 7906608..f25b1cd 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -116,7 +116,8 @@ */ DEFINE_RWLOCK(tipc_net_lock); -struct network tipc_net = { NULL }; +struct _zone *tipc_zones[256] = { NULL, }; +struct network tipc_net = { tipc_zones }; struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref) { @@ -158,28 +159,12 @@ void tipc_net_send_external_routes(u32 dest) } } -static int net_init(void) -{ - memset(&tipc_net, 0, sizeof(tipc_net)); - tipc_net.zones = kcalloc(tipc_max_zones + 1, sizeof(struct _zone *), GFP_ATOMIC); - if (!tipc_net.zones) { - return -ENOMEM; - } - return 0; -} - static void net_stop(void) { u32 z_num; - if (!tipc_net.zones) - return; - - for (z_num = 1; z_num <= tipc_max_zones; z_num++) { + for (z_num = 1; z_num <= tipc_max_zones; z_num++) tipc_zone_delete(tipc_net.zones[z_num]); - } - kfree(tipc_net.zones); - tipc_net.zones = NULL; } static void net_route_named_msg(struct sk_buff *buf) @@ -282,9 +267,7 @@ int tipc_net_start(u32 addr) tipc_named_reinit(); tipc_port_reinit(); - if ((res = tipc_bearer_init()) || - (res = net_init()) || - (res = tipc_cltr_init()) || + if ((res = tipc_cltr_init()) || (res = tipc_bclink_init())) { return res; } diff --git a/net/tipc/ref.c b/net/tipc/ref.c index 414fc34..8dea665 100644 --- a/net/tipc/ref.c +++ b/net/tipc/ref.c @@ -153,11 +153,11 @@ void tipc_ref_table_stop(void) u32 tipc_ref_acquire(void *object, spinlock_t **lock) { - struct reference *entry; u32 index; u32 index_mask; u32 next_plus_upper; u32 ref; + struct reference *entry = NULL; if (!object) { err("Attempt to acquire reference to non-existent object\n"); @@ -175,30 +175,36 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock) index = tipc_ref_table.first_free; entry = &(tipc_ref_table.entries[index]); index_mask = tipc_ref_table.index_mask; - /* take lock in case a previous user of entry still holds it */ - spin_lock_bh(&entry->lock); next_plus_upper = entry->ref; tipc_ref_table.first_free = next_plus_upper & index_mask; ref = (next_plus_upper & ~index_mask) + index; - entry->ref = ref; - entry->object = object; - *lock = &entry->lock; } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { index = tipc_ref_table.init_point++; entry = &(tipc_ref_table.entries[index]); spin_lock_init(&entry->lock); - spin_lock_bh(&entry->lock); ref = tipc_ref_table.start_mask + index; - entry->ref = ref; - entry->object = object; - *lock = &entry->lock; } else { ref = 0; } write_unlock_bh(&ref_table_lock); + /* + * Grab the lock so no one else can modify this entry + * While we assign its ref value & object pointer + */ + if (entry) { + spin_lock_bh(&entry->lock); + entry->ref = ref; + entry->object = object; + *lock = &entry->lock; + /* + * keep it locked, the caller is responsible + * for unlocking this when they're done with it + */ + } + return ref; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1ea64f0..4b235fc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1322,8 +1322,10 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) if (!sock_owned_by_user(sk)) { res = filter_rcv(sk, buf); } else { - sk_add_backlog(sk, buf); - res = TIPC_OK; + if (sk_add_backlog(sk, buf)) + res = TIPC_ERR_OVERLOAD; + else + res = TIPC_OK; } bh_unlock_sock(sk); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index ac91f0d..ff123e5 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -76,19 +76,6 @@ struct top_srv { static struct top_srv topsrv = { 0 }; /** - * htohl - convert value to endianness used by destination - * @in: value to convert - * @swap: non-zero if endianness must be reversed - * - * Returns converted value - */ - -static u32 htohl(u32 in, int swap) -{ - return swap ? swab32(in) : in; -} - -/** * subscr_send_event - send a message containing a tipc_event to the subscriber * * Note: Must not hold subscriber's server port lock, since tipc_send() will @@ -107,11 +94,11 @@ static void subscr_send_event(struct subscription *sub, msg_sect.iov_base = (void *)&sub->evt; msg_sect.iov_len = sizeof(struct tipc_event); - sub->evt.event = htohl(event, sub->swap); - sub->evt.found_lower = htohl(found_lower, sub->swap); - sub->evt.found_upper = htohl(found_upper, sub->swap); - sub->evt.port.ref = htohl(port_ref, sub->swap); - sub->evt.port.node = htohl(node, sub->swap); + sub->evt.event = htonl(event); + sub->evt.found_lower = htonl(found_lower); + sub->evt.found_upper = htonl(found_upper); + sub->evt.port.ref = htonl(port_ref); + sub->evt.port.node = htonl(node); tipc_send(sub->server_ref, 1, &msg_sect); } @@ -287,16 +274,23 @@ static void subscr_cancel(struct tipc_subscr *s, { struct subscription *sub; struct subscription *sub_temp; + __u32 type, lower, upper; int found = 0; /* Find first matching subscription, exit if not found */ + type = ntohl(s->seq.type); + lower = ntohl(s->seq.lower); + upper = ntohl(s->seq.upper); + list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, subscription_list) { - if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) { - found = 1; - break; - } + if ((type == sub->seq.type) && + (lower == sub->seq.lower) && + (upper == sub->seq.upper)) { + found = 1; + break; + } } if (!found) return; @@ -325,16 +319,10 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s, struct subscriber *subscriber) { struct subscription *sub; - int swap; - - /* Determine subscriber's endianness */ - - swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE)); /* Detect & process a subscription cancellation request */ - if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { - s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); + if (ntohl(s->filter) & TIPC_SUB_CANCEL) { subscr_cancel(s, subscriber); return NULL; } @@ -359,11 +347,11 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s, /* Initialize subscription object */ - sub->seq.type = htohl(s->seq.type, swap); - sub->seq.lower = htohl(s->seq.lower, swap); - sub->seq.upper = htohl(s->seq.upper, swap); - sub->timeout = htohl(s->timeout, swap); - sub->filter = htohl(s->filter, swap); + sub->seq.type = ntohl(s->seq.type); + sub->seq.lower = ntohl(s->seq.lower); + sub->seq.upper = ntohl(s->seq.upper); + sub->timeout = ntohl(s->timeout); + sub->filter = ntohl(s->filter); if ((!(sub->filter & TIPC_SUB_PORTS) == !(sub->filter & TIPC_SUB_SERVICE)) || (sub->seq.lower > sub->seq.upper)) { @@ -376,7 +364,6 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s, INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); sub->server_ref = subscriber->port_ref; - sub->swap = swap; memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); atomic_inc(&topsrv.subscription_count); if (sub->timeout != TIPC_WAIT_FOREVER) { diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 45d89bf..c20f496 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -53,7 +53,6 @@ typedef void (*tipc_subscr_event) (struct subscription *sub, * @nameseq_list: adjacent subscriptions in name sequence's subscription list * @subscription_list: adjacent subscriptions in subscriber's subscription list * @server_ref: object reference of server port associated with subscription - * @swap: indicates if subscriber uses opposite endianness in its messages * @evt: template for events generated by subscription */ @@ -66,7 +65,6 @@ struct subscription { struct list_head nameseq_list; struct list_head subscription_list; u32 server_ref; - int swap; struct tipc_event evt; }; diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 3e1efe5..52e3042 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -53,7 +53,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) if (!sock_owned_by_user(sk)) { queued = x25_process_rx_frame(sk, skb); } else { - sk_add_backlog(sk, skb); + queued = !sk_add_backlog(sk, skb); } bh_unlock_sock(sk); sock_put(sk); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 34a5ef8..843e066 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1372,7 +1372,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, return err; } -static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); @@ -1381,7 +1382,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) if (!afinfo) return -EINVAL; - err = afinfo->fill_dst(xdst, dev); + err = afinfo->fill_dst(xdst, dev, fl); xfrm_policy_put_afinfo(afinfo); @@ -1486,7 +1487,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; - err = xfrm_fill_dst(xdst, dev); + err = xfrm_fill_dst(xdst, dev, fl); if (err) goto free_dst; |