diff options
Diffstat (limited to 'net')
86 files changed, 1004 insertions, 577 deletions
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 35b8911..fd05c81 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -39,6 +39,7 @@ #include <linux/inet.h> #include <linux/idr.h> #include <linux/file.h> +#include <linux/highmem.h> #include <linux/slab.h> #include <net/9p/9p.h> #include <linux/parser.h> @@ -325,7 +326,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, int count = nr_pages; while (nr_pages) { s = rest_of_page(data); - pages[index++] = virt_to_page(data); + pages[index++] = kmap_to_page(data); data += s; nr_pages--; } diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index f49da58..350bf62 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -14,49 +14,45 @@ static ssize_t show_type(struct device *cdev, struct device_attribute *attr, char *buf) { struct atm_dev *adev = to_atm_dev(cdev); - return sprintf(buf, "%s\n", adev->type); + + return scnprintf(buf, PAGE_SIZE, "%s\n", adev->type); } static ssize_t show_address(struct device *cdev, struct device_attribute *attr, char *buf) { - char *pos = buf; struct atm_dev *adev = to_atm_dev(cdev); - int i; - - for (i = 0; i < (ESI_LEN - 1); i++) - pos += sprintf(pos, "%02x:", adev->esi[i]); - pos += sprintf(pos, "%02x\n", adev->esi[i]); - return pos - buf; + return scnprintf(buf, PAGE_SIZE, "%pM\n", adev->esi); } static ssize_t show_atmaddress(struct device *cdev, struct device_attribute *attr, char *buf) { unsigned long flags; - char *pos = buf; struct atm_dev *adev = to_atm_dev(cdev); struct atm_dev_addr *aaddr; int bin[] = { 1, 2, 10, 6, 1 }, *fmt = bin; - int i, j; + int i, j, count = 0; spin_lock_irqsave(&adev->lock, flags); list_for_each_entry(aaddr, &adev->local, entry) { for (i = 0, j = 0; i < ATM_ESA_LEN; ++i, ++j) { if (j == *fmt) { - pos += sprintf(pos, "."); + count += scnprintf(buf + count, + PAGE_SIZE - count, "."); ++fmt; j = 0; } - pos += sprintf(pos, "%02x", - aaddr->addr.sas_addr.prv[i]); + count += scnprintf(buf + count, + PAGE_SIZE - count, "%02x", + aaddr->addr.sas_addr.prv[i]); } - pos += sprintf(pos, "\n"); + count += scnprintf(buf + count, PAGE_SIZE - count, "\n"); } spin_unlock_irqrestore(&adev->lock, flags); - return pos - buf; + return count; } static ssize_t show_atmindex(struct device *cdev, @@ -64,25 +60,21 @@ static ssize_t show_atmindex(struct device *cdev, { struct atm_dev *adev = to_atm_dev(cdev); - return sprintf(buf, "%d\n", adev->number); + return scnprintf(buf, PAGE_SIZE, "%d\n", adev->number); } static ssize_t show_carrier(struct device *cdev, struct device_attribute *attr, char *buf) { - char *pos = buf; struct atm_dev *adev = to_atm_dev(cdev); - pos += sprintf(pos, "%d\n", - adev->signal == ATM_PHY_SIG_LOST ? 0 : 1); - - return pos - buf; + return scnprintf(buf, PAGE_SIZE, "%d\n", + adev->signal == ATM_PHY_SIG_LOST ? 0 : 1); } static ssize_t show_link_rate(struct device *cdev, struct device_attribute *attr, char *buf) { - char *pos = buf; struct atm_dev *adev = to_atm_dev(cdev); int link_rate; @@ -100,9 +92,7 @@ static ssize_t show_link_rate(struct device *cdev, default: link_rate = adev->link_rate * 8 * 53; } - pos += sprintf(pos, "%d\n", link_rate); - - return pos - buf; + return scnprintf(buf, PAGE_SIZE, "%d\n", link_rate); } static DEVICE_ATTR(address, S_IRUGO, show_address, NULL); diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 9f3925a..7d02ebd 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -123,7 +123,7 @@ batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv) unsigned int msecs; msecs = atomic_read(&bat_priv->orig_interval) - BATADV_JITTER; - msecs += (random32() % 2 * BATADV_JITTER); + msecs += random32() % (2 * BATADV_JITTER); return jiffies + msecs_to_jiffies(msecs); } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 1c8fdc3..37fe693 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -366,11 +366,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) err = netdev_set_master(dev, br->dev); if (err) - goto err3; + goto err4; err = netdev_rx_handler_register(dev, br_handle_frame, p); if (err) - goto err4; + goto err5; dev->priv_flags |= IFF_BRIDGE_PORT; @@ -402,8 +402,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return 0; -err4: +err5: netdev_set_master(dev, NULL); +err4: + br_netpoll_disable(p); err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 6f0a2ee..acc9f4c 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -83,9 +83,12 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, if (port) { struct br_mdb_entry e; e.ifindex = port->dev->ifindex; - e.addr.u.ip4 = p->addr.u.ip4; + e.state = p->state; + if (p->addr.proto == htons(ETH_P_IP)) + e.addr.u.ip4 = p->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) - e.addr.u.ip6 = p->addr.u.ip6; + if (p->addr.proto == htons(ETH_P_IPV6)) + e.addr.u.ip6 = p->addr.u.ip6; #endif e.addr.proto = p->addr.proto; if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) { @@ -253,6 +256,8 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry) #endif } else return false; + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) + return false; return true; } @@ -310,7 +315,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, } static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group) + struct br_ip *group, unsigned char state) { struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; @@ -336,7 +341,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, break; } - p = br_multicast_new_port_group(port, group, *pp); + p = br_multicast_new_port_group(port, group, *pp, state); if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); @@ -373,7 +378,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, #endif spin_lock_bh(&br->multicast_lock); - ret = br_mdb_add_group(br, p, &ip); + ret = br_mdb_add_group(br, p, &ip, entry->state); spin_unlock_bh(&br->multicast_lock); return ret; } @@ -479,3 +484,10 @@ void br_mdb_init(void) rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL); } + +void br_mdb_uninit(void) +{ + rtnl_unregister(PF_BRIDGE, RTM_GETMDB); + rtnl_unregister(PF_BRIDGE, RTM_NEWMDB); + rtnl_unregister(PF_BRIDGE, RTM_DELMDB); +} diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 1093c89..6d6f265 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -279,7 +279,7 @@ static void br_multicast_port_group_expired(unsigned long data) spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || timer_pending(&pg->timer) || - hlist_unhashed(&pg->mglist)) + hlist_unhashed(&pg->mglist) || pg->state & MDB_PERMANENT) goto out; br_multicast_del_pg(br, pg); @@ -622,7 +622,8 @@ out: struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, struct br_ip *group, - struct net_bridge_port_group __rcu *next) + struct net_bridge_port_group __rcu *next, + unsigned char state) { struct net_bridge_port_group *p; @@ -632,6 +633,7 @@ struct net_bridge_port_group *br_multicast_new_port_group( p->addr = *group; p->port = port; + p->state = state; rcu_assign_pointer(p->next, next); hlist_add_head(&p->mglist, &port->mglist); setup_timer(&p->timer, br_multicast_port_group_expired, @@ -674,7 +676,7 @@ static int br_multicast_add_group(struct net_bridge *br, break; } - p = br_multicast_new_port_group(port, group, *pp); + p = br_multicast_new_port_group(port, group, *pp, MDB_TEMPORARY); if (unlikely(!p)) goto err; rcu_assign_pointer(*pp, p); @@ -1165,7 +1167,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (max_delay) group = &mld->mld_mca; } else if (skb->len >= sizeof(*mld2q)) { - u16 mrc; if (!pskb_may_pull(skb, sizeof(*mld2q))) { err = -EINVAL; goto out; @@ -1173,8 +1174,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, mld2q = (struct mld2_query *)icmp6_hdr(skb); if (!mld2q->mld2q_nsrcs) group = &mld2q->mld2q_mca; - mrc = ntohs(mld2q->mld2q_mrc); - max_delay = mrc ? MLDV2_MRC(mrc) : 1; + max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1; } if (!group) @@ -1608,7 +1608,6 @@ void br_multicast_init(struct net_bridge *br) br_multicast_querier_expired, (unsigned long)br); setup_timer(&br->multicast_query_timer, br_multicast_query_expired, (unsigned long)br); - br_mdb_init(); } void br_multicast_open(struct net_bridge *br) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index dead9df..5dc66ab 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -299,11 +299,21 @@ struct rtnl_link_ops br_link_ops __read_mostly = { int __init br_netlink_init(void) { - return rtnl_link_register(&br_link_ops); + int err; + + br_mdb_init(); + err = rtnl_link_register(&br_link_ops); + if (err) + goto out; + + return 0; +out: + br_mdb_uninit(); + return err; } void __exit br_netlink_fini(void) { + br_mdb_uninit(); rtnl_link_unregister(&br_link_ops); - rtnl_unregister_all(PF_BRIDGE); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f21a739..711094a 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -83,6 +83,7 @@ struct net_bridge_port_group { struct rcu_head rcu; struct timer_list timer; struct br_ip addr; + unsigned char state; }; struct net_bridge_mdb_entry @@ -443,8 +444,10 @@ extern void br_multicast_free_pg(struct rcu_head *head); extern struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, struct br_ip *group, - struct net_bridge_port_group *next); + struct net_bridge_port_group *next, + unsigned char state); extern void br_mdb_init(void); +extern void br_mdb_uninit(void); extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, struct br_ip *group, int type); @@ -523,6 +526,12 @@ static inline bool br_multicast_is_router(struct net_bridge *br) { return 0; } +static inline void br_mdb_init(void) +{ +} +static inline void br_mdb_uninit(void) +{ +} #endif /* br_netfilter.c */ diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index a802029..ee71ea2 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -305,7 +305,6 @@ ceph_parse_options(char *options, const char *dev_name, /* start with defaults */ opt->flags = CEPH_OPT_DEFAULT; - opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ @@ -391,7 +390,7 @@ ceph_parse_options(char *options, const char *dev_name, /* misc */ case Opt_osdtimeout: - opt->osd_timeout = intval; + pr_warning("ignoring deprecated osdtimeout option\n"); break; case Opt_osdkeepalivetimeout: opt->osd_keepalive_timeout = intval; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 3ef1759..5ccf87e 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -506,6 +506,7 @@ static void reset_connection(struct ceph_connection *con) { /* reset connection, out_queue, msg_ and connect_seq */ /* discard existing out_queue and msg_seq */ + dout("reset_connection %p\n", con); ceph_msg_remove_list(&con->out_queue); ceph_msg_remove_list(&con->out_sent); @@ -561,7 +562,7 @@ void ceph_con_open(struct ceph_connection *con, mutex_lock(&con->mutex); dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); - BUG_ON(con->state != CON_STATE_CLOSED); + WARN_ON(con->state != CON_STATE_CLOSED); con->state = CON_STATE_PREOPEN; con->peer_name.type = (__u8) entity_type; @@ -1506,13 +1507,6 @@ static int process_banner(struct ceph_connection *con) return 0; } -static void fail_protocol(struct ceph_connection *con) -{ - reset_connection(con); - BUG_ON(con->state != CON_STATE_NEGOTIATING); - con->state = CON_STATE_CLOSED; -} - static int process_connect(struct ceph_connection *con) { u64 sup_feat = con->msgr->supported_features; @@ -1530,7 +1524,7 @@ static int process_connect(struct ceph_connection *con) ceph_pr_addr(&con->peer_addr.in_addr), sup_feat, server_feat, server_feat & ~sup_feat); con->error_msg = "missing required protocol features"; - fail_protocol(con); + reset_connection(con); return -1; case CEPH_MSGR_TAG_BADPROTOVER: @@ -1541,7 +1535,7 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->out_connect.protocol_version), le32_to_cpu(con->in_reply.protocol_version)); con->error_msg = "protocol version mismatch"; - fail_protocol(con); + reset_connection(con); return -1; case CEPH_MSGR_TAG_BADAUTHORIZER: @@ -1631,11 +1625,11 @@ static int process_connect(struct ceph_connection *con) ceph_pr_addr(&con->peer_addr.in_addr), req_feat, server_feat, req_feat & ~server_feat); con->error_msg = "missing required protocol features"; - fail_protocol(con); + reset_connection(con); return -1; } - BUG_ON(con->state != CON_STATE_NEGOTIATING); + WARN_ON(con->state != CON_STATE_NEGOTIATING); con->state = CON_STATE_OPEN; con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); @@ -2132,7 +2126,6 @@ more: if (ret < 0) goto out; - BUG_ON(con->state != CON_STATE_CONNECTING); con->state = CON_STATE_NEGOTIATING; /* @@ -2160,7 +2153,7 @@ more: goto more; } - BUG_ON(con->state != CON_STATE_OPEN); + WARN_ON(con->state != CON_STATE_OPEN); if (con->in_base_pos < 0) { /* @@ -2244,22 +2237,62 @@ bad_tag: /* - * Atomically queue work on a connection. Bump @con reference to - * avoid races with connection teardown. + * Atomically queue work on a connection after the specified delay. + * Bump @con reference to avoid races with connection teardown. + * Returns 0 if work was queued, or an error code otherwise. */ -static void queue_con(struct ceph_connection *con) +static int queue_con_delay(struct ceph_connection *con, unsigned long delay) { if (!con->ops->get(con)) { - dout("queue_con %p ref count 0\n", con); - return; + dout("%s %p ref count 0\n", __func__, con); + + return -ENOENT; } - if (!queue_delayed_work(ceph_msgr_wq, &con->work, 0)) { - dout("queue_con %p - already queued\n", con); + if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { + dout("%s %p - already queued\n", __func__, con); con->ops->put(con); - } else { - dout("queue_con %p\n", con); + + return -EBUSY; + } + + dout("%s %p %lu\n", __func__, con, delay); + + return 0; +} + +static void queue_con(struct ceph_connection *con) +{ + (void) queue_con_delay(con, 0); +} + +static bool con_sock_closed(struct ceph_connection *con) +{ + if (!test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) + return false; + +#define CASE(x) \ + case CON_STATE_ ## x: \ + con->error_msg = "socket closed (con state " #x ")"; \ + break; + + switch (con->state) { + CASE(CLOSED); + CASE(PREOPEN); + CASE(CONNECTING); + CASE(NEGOTIATING); + CASE(OPEN); + CASE(STANDBY); + default: + pr_warning("%s con %p unrecognized state %lu\n", + __func__, con, con->state); + con->error_msg = "unrecognized con state"; + BUG(); + break; } +#undef CASE + + return true; } /* @@ -2273,35 +2306,16 @@ static void con_work(struct work_struct *work) mutex_lock(&con->mutex); restart: - if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) { - switch (con->state) { - case CON_STATE_CONNECTING: - con->error_msg = "connection failed"; - break; - case CON_STATE_NEGOTIATING: - con->error_msg = "negotiation failed"; - break; - case CON_STATE_OPEN: - con->error_msg = "socket closed"; - break; - default: - dout("unrecognized con state %d\n", (int)con->state); - con->error_msg = "unrecognized con state"; - BUG(); - } + if (con_sock_closed(con)) goto fault; - } if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) { dout("con_work %p backing off\n", con); - if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay))) { - dout("con_work %p backoff %lu\n", con, con->delay); - mutex_unlock(&con->mutex); - return; - } else { + ret = queue_con_delay(con, round_jiffies_relative(con->delay)); + if (ret) { dout("con_work %p FAILED to back off %lu\n", con, con->delay); + BUG_ON(ret == -ENOENT); set_bit(CON_FLAG_BACKOFF, &con->flags); } goto done; @@ -2356,12 +2370,12 @@ fault: static void ceph_fault(struct ceph_connection *con) __releases(con->mutex) { - pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), + pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); dout("fault %p state %lu to peer %s\n", con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); - BUG_ON(con->state != CON_STATE_CONNECTING && + WARN_ON(con->state != CON_STATE_CONNECTING && con->state != CON_STATE_NEGOTIATING && con->state != CON_STATE_OPEN); @@ -2398,24 +2412,8 @@ static void ceph_fault(struct ceph_connection *con) con->delay = BASE_DELAY_INTERVAL; else if (con->delay < MAX_DELAY_INTERVAL) con->delay *= 2; - con->ops->get(con); - if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay))) { - dout("fault queued %p delay %lu\n", con, con->delay); - } else { - con->ops->put(con); - dout("fault failed to queue %p delay %lu, backoff\n", - con, con->delay); - /* - * In many cases we see a socket state change - * while con_work is running and end up - * queuing (non-delayed) work, such that we - * can't backoff with a delay. Set a flag so - * that when con_work restarts we schedule the - * delay then. - */ - set_bit(CON_FLAG_BACKOFF, &con->flags); - } + set_bit(CON_FLAG_BACKOFF, &con->flags); + queue_con(con); } out_unlock: diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index c1d756c..eb9a444 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -221,6 +221,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, kref_init(&req->r_kref); init_completion(&req->r_completion); init_completion(&req->r_safe_completion); + RB_CLEAR_NODE(&req->r_node); INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_osd); @@ -580,7 +581,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, dout("__kick_osd_requests osd%d\n", osd->o_osd); err = __reset_osd(osdc, osd); - if (err == -EAGAIN) + if (err) return; list_for_each_entry(req, &osd->o_requests, r_osd_item) { @@ -607,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, } } -static void kick_osd_requests(struct ceph_osd_client *osdc, - struct ceph_osd *kickosd) -{ - mutex_lock(&osdc->request_mutex); - __kick_osd_requests(osdc, kickosd); - mutex_unlock(&osdc->request_mutex); -} - /* * If the osd connection drops, we need to resubmit all requests. */ @@ -628,7 +621,9 @@ static void osd_reset(struct ceph_connection *con) dout("osd_reset osd%d\n", osd->o_osd); osdc = osd->o_osdc; down_read(&osdc->map_sem); - kick_osd_requests(osdc, osd); + mutex_lock(&osdc->request_mutex); + __kick_osd_requests(osdc, osd); + mutex_unlock(&osdc->request_mutex); send_queued(osdc); up_read(&osdc->map_sem); } @@ -647,6 +642,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) atomic_set(&osd->o_ref, 1); osd->o_osdc = osdc; osd->o_osd = onum; + RB_CLEAR_NODE(&osd->o_node); INIT_LIST_HEAD(&osd->o_requests); INIT_LIST_HEAD(&osd->o_linger_requests); INIT_LIST_HEAD(&osd->o_osd_lru); @@ -750,6 +746,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) if (list_empty(&osd->o_requests) && list_empty(&osd->o_linger_requests)) { __remove_osd(osdc, osd); + ret = -ENODEV; } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], &osd->o_con.peer_addr, sizeof(osd->o_con.peer_addr)) == 0 && @@ -876,9 +873,9 @@ static void __unregister_request(struct ceph_osd_client *osdc, req->r_osd = NULL; } + list_del_init(&req->r_req_lru_item); ceph_osdc_put_request(req); - list_del_init(&req->r_req_lru_item); if (osdc->num_requests == 0) { dout(" no requests, canceling timeout\n"); __cancel_osd_timeout(osdc); @@ -910,8 +907,8 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { dout("__unregister_linger_request %p\n", req); + list_del_init(&req->r_linger_item); if (req->r_osd) { - list_del_init(&req->r_linger_item); list_del_init(&req->r_linger_osd); if (list_empty(&req->r_osd->o_requests) && @@ -1090,12 +1087,10 @@ static void handle_timeout(struct work_struct *work) { struct ceph_osd_client *osdc = container_of(work, struct ceph_osd_client, timeout_work.work); - struct ceph_osd_request *req, *last_req = NULL; + struct ceph_osd_request *req; struct ceph_osd *osd; - unsigned long timeout = osdc->client->options->osd_timeout * HZ; unsigned long keepalive = osdc->client->options->osd_keepalive_timeout * HZ; - unsigned long last_stamp = 0; struct list_head slow_osds; dout("timeout\n"); down_read(&osdc->map_sem); @@ -1105,37 +1100,6 @@ static void handle_timeout(struct work_struct *work) mutex_lock(&osdc->request_mutex); /* - * reset osds that appear to be _really_ unresponsive. this - * is a failsafe measure.. we really shouldn't be getting to - * this point if the system is working properly. the monitors - * should mark the osd as failed and we should find out about - * it from an updated osd map. - */ - while (timeout && !list_empty(&osdc->req_lru)) { - req = list_entry(osdc->req_lru.next, struct ceph_osd_request, - r_req_lru_item); - - /* hasn't been long enough since we sent it? */ - if (time_before(jiffies, req->r_stamp + timeout)) - break; - - /* hasn't been long enough since it was acked? */ - if (req->r_request->ack_stamp == 0 || - time_before(jiffies, req->r_request->ack_stamp + timeout)) - break; - - BUG_ON(req == last_req && req->r_stamp == last_stamp); - last_req = req; - last_stamp = req->r_stamp; - - osd = req->r_osd; - BUG_ON(!osd); - pr_warning(" tid %llu timed out on osd%d, will reset osd\n", - req->r_tid, osd->o_osd); - __kick_osd_requests(osdc, osd); - } - - /* * ping osds that are a bit slow. this ensures that if there * is a break in the TCP connection we will notice, and reopen * a connection with that osd (from the fault callback). @@ -1306,7 +1270,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc) * Requeue requests whose mapping to an OSD has changed. If requests map to * no osd, request a new map. * - * Caller should hold map_sem for read and request_mutex. + * Caller should hold map_sem for read. */ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) { @@ -1320,6 +1284,24 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) for (p = rb_first(&osdc->requests); p; ) { req = rb_entry(p, struct ceph_osd_request, r_node); p = rb_next(p); + + /* + * For linger requests that have not yet been + * registered, move them to the linger list; they'll + * be sent to the osd in the loop below. Unregister + * the request before re-registering it as a linger + * request to ensure the __map_request() below + * will decide it needs to be sent. + */ + if (req->r_linger && list_empty(&req->r_linger_item)) { + dout("%p tid %llu restart on osd%d\n", + req, req->r_tid, + req->r_osd ? req->r_osd->o_osd : -1); + __unregister_request(osdc, req); + __register_linger_request(osdc, req); + continue; + } + err = __map_request(osdc, req, force_resend); if (err < 0) continue; /* error */ @@ -1334,17 +1316,6 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) req->r_flags |= CEPH_OSD_FLAG_RETRY; } } - if (req->r_linger && list_empty(&req->r_linger_item)) { - /* - * register as a linger so that we will - * re-submit below and get a new tid - */ - dout("%p tid %llu restart on osd%d\n", - req, req->r_tid, - req->r_osd ? req->r_osd->o_osd : -1); - __register_linger_request(osdc, req); - __unregister_request(osdc, req); - } } list_for_each_entry_safe(req, nreq, &osdc->req_linger, @@ -1352,6 +1323,7 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("linger req=%p req->r_osd=%p\n", req, req->r_osd); err = __map_request(osdc, req, force_resend); + dout("__map_request returned %d\n", err); if (err == 0) continue; /* no change and no osd was specified */ if (err < 0) @@ -1364,8 +1336,8 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid, req->r_osd ? req->r_osd->o_osd : -1); - __unregister_linger_request(osdc, req); __register_request(osdc, req); + __unregister_linger_request(osdc, req); } mutex_unlock(&osdc->request_mutex); @@ -1373,6 +1345,7 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("%d requests for down osds, need new map\n", needmap); ceph_monc_request_next_osdmap(&osdc->client->monc); } + reset_changed_osds(osdc); } @@ -1429,7 +1402,6 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) osdc->osdmap = newmap; } kick_requests(osdc, 0); - reset_changed_osds(osdc); } else { dout("ignoring incremental map %u len %d\n", epoch, maplen); @@ -1599,6 +1571,7 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc, event->data = data; event->osdc = osdc; INIT_LIST_HEAD(&event->osd_node); + RB_CLEAR_NODE(&event->node); kref_init(&event->kref); /* one ref for us */ kref_get(&event->kref); /* one ref for the caller */ init_completion(&event->completion); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 5433fb0..de73214 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -469,6 +469,22 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) return NULL; } +const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id) +{ + struct ceph_pg_pool_info *pi; + + if (id == CEPH_NOPOOL) + return NULL; + + if (WARN_ON_ONCE(id > (u64) INT_MAX)) + return NULL; + + pi = __lookup_pg_pool(&map->pg_pools, (int) id); + + return pi ? pi->name : NULL; +} +EXPORT_SYMBOL(ceph_pg_pool_name_by_id); + int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name) { struct rb_node *rbp; @@ -645,10 +661,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ceph_decode_32_safe(p, end, max, bad); while (max--) { ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); + err = -ENOMEM; pi = kzalloc(sizeof(*pi), GFP_NOFS); if (!pi) goto bad; pi->id = ceph_decode_32(p); + err = -EINVAL; ev = ceph_decode_8(p); /* encoding version */ if (ev > CEPH_PG_POOL_VERSION) { pr_warning("got unknown v %d > %d of ceph_pg_pool\n", @@ -664,8 +682,13 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) __insert_pg_pool(&map->pg_pools, pi); } - if (version >= 5 && __decode_pool_names(p, end, map) < 0) - goto bad; + if (version >= 5) { + err = __decode_pool_names(p, end, map); + if (err < 0) { + dout("fail to decode pool names"); + goto bad; + } + } ceph_decode_32_safe(p, end, map->pool_max, bad); @@ -745,7 +768,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) return map; bad: - dout("osdmap_decode fail\n"); + dout("osdmap_decode fail err %d\n", err); ceph_osdmap_destroy(map); return ERR_PTR(err); } @@ -839,6 +862,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, if (ev > CEPH_PG_POOL_VERSION) { pr_warning("got unknown v %d > %d of ceph_pg_pool\n", ev, CEPH_PG_POOL_VERSION); + err = -EINVAL; goto bad; } pi = __lookup_pg_pool(&map->pg_pools, pool); @@ -855,8 +879,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, if (err < 0) goto bad; } - if (version >= 5 && __decode_pool_names(p, end, map) < 0) - goto bad; + if (version >= 5) { + err = __decode_pool_names(p, end, map); + if (err < 0) + goto bad; + } /* old_pool */ ceph_decode_32_safe(p, end, len, bad); @@ -932,15 +959,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, (void) __remove_pg_mapping(&map->pg_temp, pgid); /* insert */ - if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) { - err = -EINVAL; + err = -EINVAL; + if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) goto bad; - } + err = -ENOMEM; pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS); - if (!pg) { - err = -ENOMEM; + if (!pg) goto bad; - } pg->pgid = pgid; pg->len = pglen; for (j = 0; j < pglen; j++) diff --git a/net/core/dev.c b/net/core/dev.c index d0cbc93..f64e439b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -203,7 +203,7 @@ static struct list_head offload_base __read_mostly; DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); -DEFINE_SEQLOCK(devnet_rename_seq); +seqcount_t devnet_rename_seq; static inline void dev_base_seq_inc(struct net *net) { @@ -1093,10 +1093,10 @@ int dev_change_name(struct net_device *dev, const char *newname) if (dev->flags & IFF_UP) return -EBUSY; - write_seqlock(&devnet_rename_seq); + write_seqcount_begin(&devnet_rename_seq); if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { - write_sequnlock(&devnet_rename_seq); + write_seqcount_end(&devnet_rename_seq); return 0; } @@ -1104,7 +1104,7 @@ int dev_change_name(struct net_device *dev, const char *newname) err = dev_get_valid_name(net, dev, newname); if (err < 0) { - write_sequnlock(&devnet_rename_seq); + write_seqcount_end(&devnet_rename_seq); return err; } @@ -1112,11 +1112,11 @@ rollback: ret = device_rename(&dev->dev, dev->name); if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); - write_sequnlock(&devnet_rename_seq); + write_seqcount_end(&devnet_rename_seq); return ret; } - write_sequnlock(&devnet_rename_seq); + write_seqcount_end(&devnet_rename_seq); write_lock_bh(&dev_base_lock); hlist_del_rcu(&dev->name_hlist); @@ -1135,7 +1135,7 @@ rollback: /* err >= 0 after dev_alloc_name() or stores the first errno */ if (err >= 0) { err = ret; - write_seqlock(&devnet_rename_seq); + write_seqcount_begin(&devnet_rename_seq); memcpy(dev->name, oldname, IFNAMSIZ); goto rollback; } else { @@ -4180,7 +4180,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) return -EFAULT; retry: - seq = read_seqbegin(&devnet_rename_seq); + seq = read_seqcount_begin(&devnet_rename_seq); rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); if (!dev) { @@ -4190,7 +4190,7 @@ retry: strcpy(ifr.ifr_name, dev->name); rcu_read_unlock(); - if (read_seqretry(&devnet_rename_seq, seq)) + if (read_seqcount_retry(&devnet_rename_seq, seq)) goto retry; if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) @@ -6121,6 +6121,14 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) static const struct ethtool_ops default_ethtool_ops; +void netdev_set_default_ethtool_ops(struct net_device *dev, + const struct ethtool_ops *ops) +{ + if (dev->ethtool_ops == &default_ethtool_ops) + dev->ethtool_ops = ops; +} +EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); + /** * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 334efd5..28c5f5a 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1334,7 +1334,6 @@ struct kobj_ns_type_operations net_ns_type_operations = { }; EXPORT_SYMBOL_GPL(net_ns_type_operations); -#ifdef CONFIG_HOTPLUG static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) { struct net_device *dev = to_net_dev(d); @@ -1353,7 +1352,6 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) exit: return retval; } -#endif /* * netdev_release -- destroy and free a dead device. @@ -1382,9 +1380,7 @@ static struct class net_class = { #ifdef CONFIG_SYSFS .dev_attrs = net_class_attributes, #endif /* CONFIG_SYSFS */ -#ifdef CONFIG_HOTPLUG .dev_uevent = netdev_uevent, -#endif .ns_type = &net_ns_type_operations, .namespace = net_namespace, }; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2e9a313..8acce01 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -649,7 +649,8 @@ static int netns_install(struct nsproxy *nsproxy, void *ns) { struct net *net = ns; - if (!ns_capable(net->user_ns, CAP_SYS_ADMIN)) + if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || + !nsown_capable(CAP_SYS_ADMIN)) return -EPERM; put_net(nsproxy->net_ns); diff --git a/net/core/sock.c b/net/core/sock.c index a692ef4..bc131d4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -583,7 +583,7 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval, goto out; retry: - seq = read_seqbegin(&devnet_rename_seq); + seq = read_seqcount_begin(&devnet_rename_seq); rcu_read_lock(); dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); ret = -ENODEV; @@ -594,7 +594,7 @@ retry: strcpy(devname, dev->name); rcu_read_unlock(); - if (read_seqretry(&devnet_rename_seq, seq)) + if (read_seqcount_retry(&devnet_rename_seq, seq)) goto retry; len = strlen(devname) + 1; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 176ecdb..4f9f5eb 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -439,8 +439,8 @@ exit: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; put_and_exit: - bh_unlock_sock(newsk); - sock_put(newsk); + inet_csk_prepare_forced_close(newsk); + dccp_done(newsk); goto exit; } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 56840b2..6e05981 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -585,7 +585,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, newinet->inet_rcv_saddr = LOOPBACK4_IPV6; if (__inet_inherit_port(sk, newsk) < 0) { - sock_put(newsk); + inet_csk_prepare_forced_close(newsk); + dccp_done(newsk); goto out; } __inet6_hash(newsk, NULL); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index ce6fbdf..9547a273 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -321,7 +321,7 @@ static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb) static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) { __be32 saddr = 0; - u8 *dst_ha = NULL; + u8 dst_ha[MAX_ADDR_LEN], *dst_hw = NULL; struct net_device *dev = neigh->dev; __be32 target = *(__be32 *)neigh->primary_key; int probes = atomic_read(&neigh->probes); @@ -363,8 +363,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) if (probes < 0) { if (!(neigh->nud_state & NUD_VALID)) pr_debug("trying to ucast probe in NUD_INVALID\n"); - dst_ha = neigh->ha; - read_lock_bh(&neigh->lock); + neigh_ha_snapshot(dst_ha, neigh, dev); + dst_hw = dst_ha; } else { probes -= neigh->parms->app_probes; if (probes < 0) { @@ -376,9 +376,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) } arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, - dst_ha, dev->dev_addr, NULL); - if (dst_ha) - read_unlock_bh(&neigh->lock); + dst_hw, dev->dev_addr, NULL); } static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index cc06a47..a8e4f26 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -823,9 +823,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ifa) { ret = -ENOBUFS; ifa = inet_alloc_ifa(); - INIT_HLIST_NODE(&ifa->hash); if (!ifa) break; + INIT_HLIST_NODE(&ifa->hash); if (colon) memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); else diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 2026542..d0670f0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -710,6 +710,22 @@ void inet_csk_destroy_sock(struct sock *sk) } EXPORT_SYMBOL(inet_csk_destroy_sock); +/* This function allows to force a closure of a socket after the call to + * tcp/dccp_create_openreq_child(). + */ +void inet_csk_prepare_forced_close(struct sock *sk) +{ + /* sk_clone_lock locked the socket and set refcnt to 2 */ + bh_unlock_sock(sk); + sock_put(sk); + + /* The below has to be done to allow calling inet_csk_destroy_sock */ + sock_set_flag(sk, SOCK_DEAD); + percpu_counter_inc(sk->sk_prot->orphan_count); + inet_sk(sk)->inet_num = 0; +} +EXPORT_SYMBOL(inet_csk_prepare_forced_close); + int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) { struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a85ae2f..303012a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -750,6 +750,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev int gre_hlen; __be32 dst; int mtu; + u8 ttl; if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) @@ -760,7 +761,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (dev->header_ops && dev->type == ARPHRD_IPGRE) { gre_hlen = 0; - tiph = (const struct iphdr *)skb->data; + if (skb->protocol == htons(ETH_P_IP)) + tiph = (const struct iphdr *)skb->data; + else + tiph = &tunnel->parms.iph; } else { gre_hlen = tunnel->hlen; tiph = &tunnel->parms.iph; @@ -812,6 +816,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev goto tx_error; } + ttl = tiph->ttl; tos = tiph->tos; if (tos == 1) { tos = 0; @@ -904,11 +909,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev dev_kfree_skb(skb); skb = new_skb; old_iph = ip_hdr(skb); + /* Warning : tiph value might point to freed memory */ } - skb_reset_transport_header(skb); skb_push(skb, gre_hlen); skb_reset_network_header(skb); + skb_set_transport_header(skb, sizeof(*iph)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); @@ -927,8 +933,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); iph->daddr = fl4.daddr; iph->saddr = fl4.saddr; + iph->ttl = ttl; - if ((iph->ttl = tiph->ttl) == 0) { + if (ttl == 0) { if (skb->protocol == htons(ETH_P_IP)) iph->ttl = old_iph->ttl; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 3c9d208..d9c4f11 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -590,7 +590,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_TTL: if (optlen < 1) goto e_inval; - if (val != -1 && (val < 0 || val > 255)) + if (val != -1 && (val < 1 || val > 255)) goto e_inval; inet->uc_ttl = val; break; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index d763701..a2e50ae 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -136,6 +136,8 @@ __be32 ic_myaddr = NONE; /* My IP address */ static __be32 ic_netmask = NONE; /* Netmask for local subnet */ __be32 ic_gateway = NONE; /* Gateway IP address */ +__be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ + __be32 ic_servaddr = NONE; /* Boot server IP address */ __be32 root_server_addr = NONE; /* Address of NFS server */ @@ -558,6 +560,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (ic_myaddr == NONE) ic_myaddr = tip; ic_servaddr = sip; + ic_addrservaddr = sip; ic_got_reply = IC_RARP; drop_unlock: @@ -1068,7 +1071,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str ic_servaddr = server_id; #ifdef IPCONFIG_DEBUG printk("DHCP: Offered address %pI4 by server %pI4\n", - &ic_myaddr, &ic_servaddr); + &ic_myaddr, &b->iph.saddr); #endif /* The DHCP indicated server address takes * precedence over the bootp header one if @@ -1113,6 +1116,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str ic_dev = dev; ic_myaddr = b->your_ip; ic_servaddr = b->server_ip; + ic_addrservaddr = b->iph.saddr; if (ic_gateway == NONE && b->relay_ip) ic_gateway = b->relay_ip; if (ic_nameservers[0] == NONE) @@ -1268,7 +1272,7 @@ static int __init ic_dynamic(void) printk("IP-Config: Got %s answer from %pI4, ", ((ic_got_reply & IC_RARP) ? "RARP" : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), - &ic_servaddr); + &ic_addrservaddr); pr_cont("my address is %pI4\n", &ic_myaddr); return 0; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 51f13f8..04b18c1 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -81,6 +81,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) niph->saddr = oiph->daddr; niph->daddr = oiph->saddr; + skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); memset(tcph, 0, sizeof(*tcph)); tcph->source = oth->dest; diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index da2c8a3..eeaff7e 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -124,23 +124,28 @@ nf_nat_ipv4_fn(unsigned int hooknum, ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; - } else + } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; + } break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) { - nf_ct_kill_acct(ct, ctinfo, skb); - return NF_DROP; - } + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; } return nf_nat_packet(ct, ctinfo, hooknum, skb); + +oif_changed: + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; } static unsigned int diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1ca2536..2aa69c8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1428,12 +1428,12 @@ static void tcp_service_net_dma(struct sock *sk, bool wait) } #endif -static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) +static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) { struct sk_buff *skb; u32 offset; - skb_queue_walk(&sk->sk_receive_queue, skb) { + while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { offset = seq - TCP_SKB_CB(skb)->seq; if (tcp_hdr(skb)->syn) offset--; @@ -1441,6 +1441,11 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) *off = offset; return skb; } + /* This looks weird, but this can happen if TCP collapsing + * splitted a fat GRO packet, while we released socket lock + * in skb_splice_bits() + */ + sk_eat_skb(sk, skb, false); } return NULL; } @@ -1482,7 +1487,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, break; } used = recv_actor(desc, skb, offset, len); - if (used < 0) { + if (used <= 0) { if (!copied) copied = used; break; @@ -1520,8 +1525,10 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ - if (copied > 0) + if (copied > 0) { + tcp_recv_skb(sk, seq, &offset); tcp_cleanup_rbuf(sk, copied); + } return copied; } EXPORT_SYMBOL(tcp_read_sock); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a136925..18f97ca 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5543,6 +5543,9 @@ slow_path: if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb)) goto csum_error; + if (!th->ack && !th->rst) + goto discard; + /* * Standard slow path. */ @@ -5551,7 +5554,7 @@ slow_path: return 0; step5: - if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) + if (tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) goto discard; /* ts_recent update must be made after we are sure that the packet @@ -5984,11 +5987,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (tcp_check_req(sk, skb, req, NULL, true) == NULL) goto discard; } + + if (!th->ack && !th->rst) + goto discard; + if (!tcp_validate_incoming(sk, skb, th, 0)) return 0; /* step 5: check the ACK field */ - if (th->ack) { + if (true) { int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; switch (sk->sk_state) { @@ -6138,8 +6145,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } break; } - } else - goto discard; + } /* ts_recent update must be made after we are sure that the packet * is in window. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1ed2307..54139fa 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1767,10 +1767,8 @@ exit: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; put_and_exit: - tcp_clear_xmit_timers(newsk); - tcp_cleanup_congestion_control(newsk); - bh_unlock_sock(newsk); - sock_put(newsk); + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); goto exit; } EXPORT_SYMBOL(tcp_v4_syn_recv_sock); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2068ac4..4ea2448 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -41,6 +41,6 @@ obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o obj-y += addrconf_core.o exthdrs_core.o -obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6_offload) +obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6fca01f..420e563 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -154,6 +154,11 @@ static void addrconf_type_change(struct net_device *dev, unsigned long event); static int addrconf_ifdown(struct net_device *dev, int how); +static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, + int plen, + const struct net_device *dev, + u32 flags, u32 noflags); + static void addrconf_dad_start(struct inet6_ifaddr *ifp); static void addrconf_dad_timer(unsigned long data); static void addrconf_dad_completed(struct inet6_ifaddr *ifp); @@ -250,12 +255,6 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev) return !qdisc_tx_is_noop(dev); } -/* Check if a route is valid prefix route */ -static inline int addrconf_is_prefix_route(const struct rt6_info *rt) -{ - return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0; -} - static void addrconf_del_timer(struct inet6_ifaddr *ifp) { if (del_timer(&ifp->timer)) @@ -534,8 +533,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC); return; errout: - if (err < 0) - rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err); + rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err); } static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { @@ -942,17 +940,15 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { struct in6_addr prefix; struct rt6_info *rt; - struct net *net = dev_net(ifp->idev->dev); - struct flowi6 fl6 = {}; ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); - fl6.flowi6_oif = ifp->idev->dev->ifindex; - fl6.daddr = prefix; - rt = (struct rt6_info *)ip6_route_lookup(net, &fl6, - RT6_LOOKUP_F_IFACE); - if (rt != net->ipv6.ip6_null_entry && - addrconf_is_prefix_route(rt)) { + rt = addrconf_get_prefix_route(&prefix, + ifp->prefix_len, + ifp->idev->dev, + 0, RTF_GATEWAY | RTF_DEFAULT); + + if (rt) { if (onlink == 0) { ip6_del_rt(rt); rt = NULL; @@ -1878,7 +1874,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, continue; if ((rt->rt6i_flags & flags) != flags) continue; - if ((noflags != 0) && ((rt->rt6i_flags & flags) != 0)) + if ((rt->rt6i_flags & noflags) != 0) continue; dst_hold(&rt->dst); break; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 867466c..c727e47 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -758,8 +758,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_dst_set_noref(skb, dst); } - skb->transport_header = skb->network_header; - proto = NEXTHDR_GRE; if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); @@ -768,6 +766,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_push(skb, gre_hlen); skb_reset_network_header(skb); + skb_set_transport_header(skb, sizeof(*ipv6h)); /* * Push down and install the IP header. diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f2a007b..6574175 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1314,6 +1314,12 @@ out: static void ndisc_redirect_rcv(struct sk_buff *skb) { + u8 *hdr; + struct ndisc_options ndopts; + struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb); + u32 ndoptlen = skb->tail - (skb->transport_header + + offsetof(struct rd_msg, opt)); + #ifdef CONFIG_IPV6_NDISC_NODETYPE switch (skb->ndisc_nodetype) { case NDISC_NODETYPE_HOST: @@ -1330,6 +1336,17 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } + if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) + return; + + if (!ndopts.nd_opts_rh) + return; + + hdr = (u8 *)ndopts.nd_opts_rh; + hdr += 8; + if (!pskb_pull(skb, hdr - skb_transport_header(skb))) + return; + icmpv6_notify(skb, NDISC_REDIRECT, 0, 0); } diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index e948691..7302b0b 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -14,42 +14,23 @@ #include <linux/netfilter_ipv6/ip6t_NPT.h> #include <linux/netfilter/x_tables.h> -static __sum16 csum16_complement(__sum16 a) -{ - return (__force __sum16)(0xffff - (__force u16)a); -} - -static __sum16 csum16_add(__sum16 a, __sum16 b) -{ - u16 sum; - - sum = (__force u16)a + (__force u16)b; - sum += (__force u16)a < (__force u16)b; - return (__force __sum16)sum; -} - -static __sum16 csum16_sub(__sum16 a, __sum16 b) -{ - return csum16_add(a, csum16_complement(b)); -} - static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) { struct ip6t_npt_tginfo *npt = par->targinfo; - __sum16 src_sum = 0, dst_sum = 0; + __wsum src_sum = 0, dst_sum = 0; unsigned int i; if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) return -EINVAL; for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) { - src_sum = csum16_add(src_sum, - (__force __sum16)npt->src_pfx.in6.s6_addr16[i]); - dst_sum = csum16_add(dst_sum, - (__force __sum16)npt->dst_pfx.in6.s6_addr16[i]); + src_sum = csum_add(src_sum, + (__force __wsum)npt->src_pfx.in6.s6_addr16[i]); + dst_sum = csum_add(dst_sum, + (__force __wsum)npt->dst_pfx.in6.s6_addr16[i]); } - npt->adjustment = csum16_sub(src_sum, dst_sum); + npt->adjustment = (__force __sum16) csum_sub(src_sum, dst_sum); return 0; } @@ -85,7 +66,7 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, return false; } - sum = csum16_add((__force __sum16)addr->s6_addr16[idx], + sum = (__force __sum16) csum_add((__force __wsum)addr->s6_addr16[idx], npt->adjustment); if (sum == CSUM_MANGLED_0) sum = 0; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index fd4fb34..029623d 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -132,6 +132,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) ip6h->saddr = oip6h->daddr; ip6h->daddr = oip6h->saddr; + skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); /* Truncate to length (no data) */ tcph->doff = sizeof(struct tcphdr)/4; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 6c8ae24..e0e788d 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -127,23 +127,28 @@ nf_nat_ipv6_fn(unsigned int hooknum, ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; - } else + } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; + } break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) { - nf_ct_kill_acct(ct, ctinfo, skb); - return NF_DROP; - } + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; } return nf_nat_packet(ct, ctinfo, hooknum, skb); + +oif_changed: + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; } static unsigned int diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 00ee17c..137e245 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -81,8 +81,8 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, } protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); /* - * (protoff == skb->len) mean that the packet doesn't have no data - * except of IPv6 & ext headers. but it's tracked anyway. - YK + * (protoff == skb->len) means the packet has not data, just + * IPv6 and possibly extensions headers, but it is tracked anyway */ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 22c8ea9..3dacecc 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -311,7 +311,10 @@ found: else fq->q.fragments = skb; - skb->dev = NULL; + if (skb->dev) { + fq->iif = skb->dev->ifindex; + skb->dev = NULL; + } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; if (payload_len > fq->q.max_size) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6565cf5..93825dd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1288,7 +1288,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #endif if (__inet_inherit_port(sk, newsk) < 0) { - sock_put(newsk); + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); goto out; } __inet6_hash(newsk, NULL); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 3ad1f9d..df08250 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1806,7 +1806,7 @@ static void iucv_external_interrupt(struct ext_code ext_code, struct iucv_irq_data *p; struct iucv_irq_list *work; - kstat_cpu(smp_processor_id()).irqs[EXTINT_IUC]++; + inc_irq_stat(IRQEXT_IUC); p = iucv_irq_data[smp_processor_id()]; if (p->ippathid >= iucv_max_pathid) { WARN_ON(p->ippathid >= iucv_max_pathid); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5c61677..47e0aca 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1009,6 +1009,8 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) if (old_probe_resp) kfree_rcu(old_probe_resp, rcu_head); + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + sta_info_flush(local, vlan); sta_info_flush(local, sdata); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 53f0312..80e5552 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -4,6 +4,7 @@ #include <linux/nl80211.h> #include <linux/export.h> +#include <linux/rtnetlink.h> #include <net/cfg80211.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -197,6 +198,15 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) ctx = container_of(conf, struct ieee80211_chanctx, conf); + if (sdata->vif.type == NL80211_IFTYPE_AP) { + struct ieee80211_sub_if_data *vlan; + + /* for the VLAN list */ + ASSERT_RTNL(); + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + rcu_assign_pointer(vlan->vif.chanctx_conf, NULL); + } + ieee80211_unassign_vif_chanctx(sdata, ctx); if (ctx->refcount == 0) ieee80211_free_chanctx(local, ctx); @@ -316,6 +326,15 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, goto out; } + if (sdata->vif.type == NL80211_IFTYPE_AP) { + struct ieee80211_sub_if_data *vlan; + + /* for the VLAN list */ + ASSERT_RTNL(); + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + rcu_assign_pointer(vlan->vif.chanctx_conf, &ctx->conf); + } + ieee80211_recalc_smps_chanctx(local, ctx); out: mutex_unlock(&local->chanctx_mtx); @@ -331,6 +350,25 @@ void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) mutex_unlock(&sdata->local->chanctx_mtx); } +void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *ap; + struct ieee80211_chanctx_conf *conf; + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->bss)) + return; + + ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); + + mutex_lock(&local->chanctx_mtx); + + conf = rcu_dereference_protected(ap->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + rcu_assign_pointer(sdata->vif.chanctx_conf, conf); + mutex_unlock(&local->chanctx_mtx); +} + void ieee80211_iter_chan_contexts_atomic( struct ieee80211_hw *hw, void (*iter)(struct ieee80211_hw *hw, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 8881fc7..6b7644e 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -703,8 +703,8 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) sdata_info(sdata, "No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n"); - ieee80211_request_internal_scan(sdata, - ifibss->ssid, ifibss->ssid_len, NULL); + ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len, + NULL); } static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) @@ -802,9 +802,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) IEEE80211_SCAN_INTERVAL)) { sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); - ieee80211_request_internal_scan(sdata, - ifibss->ssid, ifibss->ssid_len, - ifibss->fixed_channel ? ifibss->channel : NULL); + ieee80211_request_ibss_scan(sdata, ifibss->ssid, + ifibss->ssid_len, chan); } else { int interval = IEEE80211_SCAN_INTERVAL; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 42d0d02..8563b9a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -92,8 +92,6 @@ struct ieee80211_bss { u32 device_ts; - u8 dtim_period; - bool wmm_used; bool uapsd_supported; @@ -140,7 +138,6 @@ enum ieee80211_bss_corrupt_data_flags { /** * enum ieee80211_valid_data_flags - BSS valid data flags - * @IEEE80211_BSS_VALID_DTIM: DTIM data was gathered from non-corrupt IE * @IEEE80211_BSS_VALID_WMM: WMM/UAPSD data was gathered from non-corrupt IE * @IEEE80211_BSS_VALID_RATES: Supported rates were gathered from non-corrupt IE * @IEEE80211_BSS_VALID_ERP: ERP flag was gathered from non-corrupt IE @@ -151,7 +148,6 @@ enum ieee80211_bss_corrupt_data_flags { * beacon/probe response. */ enum ieee80211_bss_valid_data_flags { - IEEE80211_BSS_VALID_DTIM = BIT(0), IEEE80211_BSS_VALID_WMM = BIT(1), IEEE80211_BSS_VALID_RATES = BIT(2), IEEE80211_BSS_VALID_ERP = BIT(3) @@ -440,6 +436,7 @@ struct ieee80211_if_managed { unsigned long timers_running; /* used for quiesce/restart */ bool powersave; /* powersave requested for this iface */ bool broken_ap; /* AP is broken -- turn off powersave */ + u8 dtim_period; enum ieee80211_smps_mode req_smps, /* requested smps mode */ driver_smps_mode; /* smps mode request */ @@ -773,6 +770,10 @@ struct ieee80211_sub_if_data { u32 mntr_flags; } u; + spinlock_t cleanup_stations_lock; + struct list_head cleanup_stations; + struct work_struct cleanup_stations_wk; + #ifdef CONFIG_MAC80211_DEBUGFS struct { struct dentry *dir; @@ -1329,9 +1330,9 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, /* scan/BSS handling */ void ieee80211_scan_work(struct work_struct *work); -int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, - const u8 *ssid, u8 ssid_len, - struct ieee80211_channel *chan); +int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, + const u8 *ssid, u8 ssid_len, + struct ieee80211_channel *chan); int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req); void ieee80211_scan_cancel(struct ieee80211_local *local); @@ -1628,6 +1629,7 @@ ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, const struct cfg80211_chan_def *chandef, enum ieee80211_chanctx_mode mode); void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata); +void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata); void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 09a80b5..8be854e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -207,17 +207,8 @@ void ieee80211_recalc_idle(struct ieee80211_local *local) static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) { - int meshhdrlen; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - meshhdrlen = (sdata->vif.type == NL80211_IFTYPE_MESH_POINT) ? 5 : 0; - - /* FIX: what would be proper limits for MTU? - * This interface uses 802.3 frames. */ - if (new_mtu < 256 || - new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6 - meshhdrlen) { + if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN) return -EINVAL; - } dev->mtu = new_mtu; return 0; @@ -586,11 +577,13 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: - /* no need to tell driver, but set carrier */ - if (rtnl_dereference(sdata->bss->beacon)) + /* no need to tell driver, but set carrier and chanctx */ + if (rtnl_dereference(sdata->bss->beacon)) { + ieee80211_vif_vlan_copy_chanctx(sdata); netif_carrier_on(dev); - else + } else { netif_carrier_off(dev); + } break; case NL80211_IFTYPE_MONITOR: if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { @@ -839,6 +832,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: list_del(&sdata->u.vlan.list); + rcu_assign_pointer(sdata->vif.chanctx_conf, NULL); /* no need to tell driver */ break; case NL80211_IFTYPE_MONITOR: @@ -865,20 +859,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&sdata->work); /* * When we get here, the interface is marked down. - * Call rcu_barrier() to wait both for the RX path + * Call synchronize_rcu() to wait for the RX path * should it be using the interface and enqueuing - * frames at this very time on another CPU, and - * for the sta free call_rcu callbacks. - */ - rcu_barrier(); - - /* - * free_sta_rcu() enqueues a work for the actual - * sta cleanup, so we need to flush it while - * sdata is still valid. + * frames at this very time on another CPU. */ - flush_workqueue(local->workqueue); - + synchronize_rcu(); skb_queue_purge(&sdata->skb_queue); /* @@ -1498,6 +1483,15 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, mutex_unlock(&local->iflist_mtx); } +static void ieee80211_cleanup_sdata_stas_wk(struct work_struct *wk) +{ + struct ieee80211_sub_if_data *sdata; + + sdata = container_of(wk, struct ieee80211_sub_if_data, cleanup_stations_wk); + + ieee80211_cleanup_sdata_stas(sdata); +} + int ieee80211_if_add(struct ieee80211_local *local, const char *name, struct wireless_dev **new_wdev, enum nl80211_iftype type, struct vif_params *params) @@ -1573,6 +1567,10 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, INIT_LIST_HEAD(&sdata->key_list); + spin_lock_init(&sdata->cleanup_stations_lock); + INIT_LIST_HEAD(&sdata->cleanup_stations); + INIT_WORK(&sdata->cleanup_stations_wk, ieee80211_cleanup_sdata_stas_wk); + for (i = 0; i < IEEE80211_NUM_BANDS; i++) { struct ieee80211_supported_band *sband; sband = local->hw.wiphy->bands[i]; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 1bf03f9..649ad51 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -163,7 +163,7 @@ int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) return -ENOMEM; sdata->u.mesh.rmc->idx_mask = RMC_BUCKETS - 1; for (i = 0; i < RMC_BUCKETS; i++) - INIT_LIST_HEAD(&sdata->u.mesh.rmc->bucket[i].list); + INIT_LIST_HEAD(&sdata->u.mesh.rmc->bucket[i]); return 0; } @@ -177,7 +177,7 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata) return; for (i = 0; i < RMC_BUCKETS; i++) - list_for_each_entry_safe(p, n, &rmc->bucket[i].list, list) { + list_for_each_entry_safe(p, n, &rmc->bucket[i], list) { list_del(&p->list); kmem_cache_free(rm_cache, p); } @@ -210,7 +210,7 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, /* Don't care about endianness since only match matters */ memcpy(&seqnum, &mesh_hdr->seqnum, sizeof(mesh_hdr->seqnum)); idx = le32_to_cpu(mesh_hdr->seqnum) & rmc->idx_mask; - list_for_each_entry_safe(p, n, &rmc->bucket[idx].list, list) { + list_for_each_entry_safe(p, n, &rmc->bucket[idx], list) { ++entries; if (time_after(jiffies, p->exp_time) || (entries == RMC_QUEUE_MAX_LEN)) { @@ -229,7 +229,7 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, p->seqnum = seqnum; p->exp_time = jiffies + RMC_TIMEOUT; memcpy(p->sa, sa, ETH_ALEN); - list_add(&p->list, &rmc->bucket[idx].list); + list_add(&p->list, &rmc->bucket[idx]); return 0; } diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 7c9215f..84c28c6 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -184,7 +184,7 @@ struct rmc_entry { }; struct mesh_rmc { - struct rmc_entry bucket[RMC_BUCKETS]; + struct list_head bucket[RMC_BUCKETS]; u32 idx_mask; }; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7753a9c..a355292 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1074,12 +1074,8 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) if (beaconint_us > latency) { local->ps_sdata = NULL; } else { - struct ieee80211_bss *bss; int maxslp = 1; - u8 dtimper; - - bss = (void *)found->u.mgd.associated->priv; - dtimper = bss->dtim_period; + u8 dtimper = found->u.mgd.dtim_period; /* If the TIM IE is invalid, pretend the value is 1 */ if (!dtimper) @@ -1410,10 +1406,17 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_led_assoc(local, 1); - if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) - bss_conf->dtim_period = bss->dtim_period; - else + if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) { + /* + * If the AP is buggy we may get here with no DTIM period + * known, so assume it's 1 which is the only safe assumption + * in that case, although if the TIM IE is broken powersave + * probably just won't work at all. + */ + bss_conf->dtim_period = sdata->u.mgd.dtim_period ?: 1; + } else { bss_conf->dtim_period = 0; + } bss_conf->assoc = 1; @@ -1562,6 +1565,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.timers_running = 0; + sdata->vif.bss_conf.dtim_period = 0; + ifmgd->flags = 0; ieee80211_vif_release_channel(sdata); } @@ -2373,11 +2378,18 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel; bool need_ps = false; - if (sdata->u.mgd.associated && - ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) { - bss = (void *)sdata->u.mgd.associated->priv; + if ((sdata->u.mgd.associated && + ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) || + (sdata->u.mgd.assoc_data && + ether_addr_equal(mgmt->bssid, + sdata->u.mgd.assoc_data->bss->bssid))) { /* not previously set so we may need to recalc */ - need_ps = !bss->dtim_period; + need_ps = sdata->u.mgd.associated && !sdata->u.mgd.dtim_period; + + if (elems->tim && !elems->parse_error) { + struct ieee80211_tim_ie *tim_ie = elems->tim; + sdata->u.mgd.dtim_period = tim_ie->dtim_period; + } } if (elems->ds_params && elems->ds_params_len == 1) @@ -3896,20 +3908,41 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, /* kick off associate process */ ifmgd->assoc_data = assoc_data; + ifmgd->dtim_period = 0; err = ieee80211_prep_connection(sdata, req->bss, true); if (err) goto err_clear; - if (!bss->dtim_period && - sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) { - /* - * Wait up to one beacon interval ... - * should this be more if we miss one? - */ - sdata_info(sdata, "waiting for beacon from %pM\n", - ifmgd->bssid); - assoc_data->timeout = TU_TO_EXP_TIME(req->bss->beacon_interval); + if (sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) { + const struct cfg80211_bss_ies *beacon_ies; + + rcu_read_lock(); + beacon_ies = rcu_dereference(req->bss->beacon_ies); + if (!beacon_ies) { + /* + * Wait up to one beacon interval ... + * should this be more if we miss one? + */ + sdata_info(sdata, "waiting for beacon from %pM\n", + ifmgd->bssid); + assoc_data->timeout = + TU_TO_EXP_TIME(req->bss->beacon_interval); + } else { + const u8 *tim_ie = cfg80211_find_ie(WLAN_EID_TIM, + beacon_ies->data, + beacon_ies->len); + if (tim_ie && tim_ie[1] >= + sizeof(struct ieee80211_tim_ie)) { + const struct ieee80211_tim_ie *tim; + tim = (void *)(tim_ie + 2); + ifmgd->dtim_period = tim->dtim_period; + } + assoc_data->have_beacon = true; + assoc_data->sent_assoc = false; + assoc_data->timeout = jiffies; + } + rcu_read_unlock(); } else { assoc_data->have_beacon = true; assoc_data->sent_assoc = false; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 8ed83dc..d59fc68 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -113,18 +113,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss->valid_data |= IEEE80211_BSS_VALID_ERP; } - if (elems->tim && (!elems->parse_error || - !(bss->valid_data & IEEE80211_BSS_VALID_DTIM))) { - struct ieee80211_tim_ie *tim_ie = elems->tim; - bss->dtim_period = tim_ie->dtim_period; - if (!elems->parse_error) - bss->valid_data |= IEEE80211_BSS_VALID_DTIM; - } - - /* If the beacon had no TIM IE, or it was invalid, use 1 */ - if (beacon && !bss->dtim_period) - bss->dtim_period = 1; - /* replace old supported rates if we get new values */ if (!elems->parse_error || !(bss->valid_data & IEEE80211_BSS_VALID_RATES)) { @@ -832,9 +820,9 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, return res; } -int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, - const u8 *ssid, u8 ssid_len, - struct ieee80211_channel *chan) +int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, + const u8 *ssid, u8 ssid_len, + struct ieee80211_channel *chan) { struct ieee80211_local *local = sdata->local; int ret = -EBUSY; @@ -848,22 +836,36 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, /* fill internal scan request */ if (!chan) { - int i, nchan = 0; + int i, max_n; + int n_ch = 0; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (!local->hw.wiphy->bands[band]) continue; - for (i = 0; - i < local->hw.wiphy->bands[band]->n_channels; - i++) { - local->int_scan_req->channels[nchan] = + + max_n = local->hw.wiphy->bands[band]->n_channels; + for (i = 0; i < max_n; i++) { + struct ieee80211_channel *tmp_ch = &local->hw.wiphy->bands[band]->channels[i]; - nchan++; + + if (tmp_ch->flags & (IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_DISABLED)) + continue; + + local->int_scan_req->channels[n_ch] = tmp_ch; + n_ch++; } } - local->int_scan_req->n_channels = nchan; + if (WARN_ON_ONCE(n_ch == 0)) + goto unlock; + + local->int_scan_req->n_channels = n_ch; } else { + if (WARN_ON_ONCE(chan->flags & (IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_DISABLED))) + goto unlock; + local->int_scan_req->channels[0] = chan; local->int_scan_req->n_channels = 1; } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f3e5025..ca9fde1 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -91,9 +91,8 @@ static int sta_info_hash_del(struct ieee80211_local *local, return -ENOENT; } -static void free_sta_work(struct work_struct *wk) +static void cleanup_single_sta(struct sta_info *sta) { - struct sta_info *sta = container_of(wk, struct sta_info, free_sta_wk); int ac, i; struct tid_ampdu_tx *tid_tx; struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -153,11 +152,35 @@ static void free_sta_work(struct work_struct *wk) sta_info_free(local, sta); } +void ieee80211_cleanup_sdata_stas(struct ieee80211_sub_if_data *sdata) +{ + struct sta_info *sta; + + spin_lock_bh(&sdata->cleanup_stations_lock); + while (!list_empty(&sdata->cleanup_stations)) { + sta = list_first_entry(&sdata->cleanup_stations, + struct sta_info, list); + list_del(&sta->list); + spin_unlock_bh(&sdata->cleanup_stations_lock); + + cleanup_single_sta(sta); + + spin_lock_bh(&sdata->cleanup_stations_lock); + } + + spin_unlock_bh(&sdata->cleanup_stations_lock); +} + static void free_sta_rcu(struct rcu_head *h) { struct sta_info *sta = container_of(h, struct sta_info, rcu_head); + struct ieee80211_sub_if_data *sdata = sta->sdata; - ieee80211_queue_work(&sta->local->hw, &sta->free_sta_wk); + spin_lock(&sdata->cleanup_stations_lock); + list_add_tail(&sta->list, &sdata->cleanup_stations); + spin_unlock(&sdata->cleanup_stations_lock); + + ieee80211_queue_work(&sdata->local->hw, &sdata->cleanup_stations_wk); } /* protected by RCU */ @@ -310,7 +333,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, spin_lock_init(&sta->lock); INIT_WORK(&sta->drv_unblock_wk, sta_unblock); - INIT_WORK(&sta->free_sta_wk, free_sta_work); INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); @@ -862,7 +884,7 @@ void sta_info_init(struct ieee80211_local *local) void sta_info_stop(struct ieee80211_local *local) { - del_timer(&local->sta_cleanup); + del_timer_sync(&local->sta_cleanup); sta_info_flush(local, NULL); } @@ -891,6 +913,20 @@ int sta_info_flush(struct ieee80211_local *local, } mutex_unlock(&local->sta_mtx); + rcu_barrier(); + + if (sdata) { + ieee80211_cleanup_sdata_stas(sdata); + cancel_work_sync(&sdata->cleanup_stations_wk); + } else { + mutex_lock(&local->iflist_mtx); + list_for_each_entry(sdata, &local->interfaces, list) { + ieee80211_cleanup_sdata_stas(sdata); + cancel_work_sync(&sdata->cleanup_stations_wk); + } + mutex_unlock(&local->iflist_mtx); + } + return ret; } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 1489bca..37c1889 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -299,7 +299,6 @@ struct sta_info { spinlock_t lock; struct work_struct drv_unblock_wk; - struct work_struct free_sta_wk; u16 listen_interval; @@ -563,4 +562,6 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta); void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta); void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta); +void ieee80211_cleanup_sdata_stas(struct ieee80211_sub_if_data *sdata); + #endif /* STA_INFO_H */ diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c index e748aed..b7c7f81 100644 --- a/net/mac802154/ieee802154_dev.c +++ b/net/mac802154/ieee802154_dev.c @@ -224,9 +224,9 @@ void ieee802154_free_device(struct ieee802154_dev *hw) BUG_ON(!list_empty(&priv->slaves)); - wpan_phy_free(priv->phy); - mutex_destroy(&priv->slaves_mtx); + + wpan_phy_free(priv->phy); } EXPORT_SYMBOL(ieee802154_free_device); diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index 1191039..199b922 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -389,7 +389,7 @@ void mac802154_wpan_setup(struct net_device *dev) static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb) { - return netif_rx(skb); + return netif_rx_ni(skb); } static int diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index fefa514..49e96df 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -680,6 +680,13 @@ config NETFILTER_XT_TARGET_NFQUEUE To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_NOTRACK + tristate '"NOTRACK" target support (DEPRECATED)' + depends on NF_CONNTRACK + depends on IP_NF_RAW || IP6_NF_RAW + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_CT + config NETFILTER_XT_TARGET_RATEEST tristate '"RATEEST" target support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 08cdc71..016d95e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1526,6 +1526,7 @@ err_extend: */ #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) #define DYING_NULLS_VAL ((1<<30)+1) +#define TEMPLATE_NULLS_VAL ((1<<30)+2) static int nf_conntrack_init_net(struct net *net) { @@ -1534,6 +1535,7 @@ static int nf_conntrack_init_net(struct net *net) atomic_set(&net->ct.count, 0); INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL); INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL); + INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL); net->ct.stat = alloc_percpu(struct ip_conntrack_stat); if (!net->ct.stat) { ret = -ENOMEM; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 4e078cd..627b0e5 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2624,7 +2624,7 @@ ctnetlink_create_expect(struct net *net, u16 zone, if (!help) { if (!cda[CTA_EXPECT_TIMEOUT]) { err = -EINVAL; - goto out; + goto err_out; } exp->timeout.expires = jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 9f199f2..92fd8ec 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -13,6 +13,7 @@ */ #include <linux/module.h> #include <linux/skbuff.h> +#include <linux/if_arp.h> #include <linux/init.h> #include <linux/ip.h> #include <linux/ipv6.h> @@ -384,6 +385,7 @@ __build_packet_message(struct nfulnl_instance *inst, struct nfgenmsg *nfmsg; sk_buff_data_t old_tail = inst->skb->tail; struct sock *sk; + const unsigned char *hwhdrp; nlh = nlmsg_put(inst->skb, 0, 0, NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, @@ -485,9 +487,17 @@ __build_packet_message(struct nfulnl_instance *inst, if (indev && skb_mac_header_was_set(skb)) { if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || nla_put_be16(inst->skb, NFULA_HWLEN, - htons(skb->dev->hard_header_len)) || - nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, - skb_mac_header(skb))) + htons(skb->dev->hard_header_len))) + goto nla_put_failure; + + hwhdrp = skb_mac_header(skb); + + if (skb->dev->type == ARPHRD_SIT) + hwhdrp -= ETH_HLEN; + + if (hwhdrp >= skb->head && + nla_put(inst->skb, NFULA_HWHEADER, + skb->dev->hard_header_len, hwhdrp)) goto nla_put_failure; } diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index ae7f5da..2a08430 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -149,6 +149,10 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) __set_bit(IPS_TEMPLATE_BIT, &ct->status); __set_bit(IPS_CONFIRMED_BIT, &ct->status); + + /* Overload tuple linked list to put us in template list. */ + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &par->net->ct.tmpl); out: info->ct = ct; return 0; @@ -289,6 +293,10 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) __set_bit(IPS_TEMPLATE_BIT, &ct->status); __set_bit(IPS_CONFIRMED_BIT, &ct->status); + + /* Overload tuple linked list to put us in template list. */ + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &par->net->ct.tmpl); out: info->ct = ct; return 0; @@ -377,14 +385,60 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { }, }; +static unsigned int +notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + /* Previously seen (loopback)? Ignore. */ + if (skb->nfct != NULL) + return XT_CONTINUE; + + skb->nfct = &nf_ct_untracked_get()->ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); + + return XT_CONTINUE; +} + +static int notrack_chk(const struct xt_tgchk_param *par) +{ + if (!par->net->xt.notrack_deprecated_warning) { + pr_info("netfilter: NOTRACK target is deprecated, " + "use CT instead or upgrade iptables\n"); + par->net->xt.notrack_deprecated_warning = true; + } + return 0; +} + +static struct xt_target notrack_tg_reg __read_mostly = { + .name = "NOTRACK", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = notrack_chk, + .target = notrack_tg, + .table = "raw", + .me = THIS_MODULE, +}; + static int __init xt_ct_tg_init(void) { - return xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg)); + int ret; + + ret = xt_register_target(¬rack_tg_reg); + if (ret < 0) + return ret; + + ret = xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg)); + if (ret < 0) { + xt_unregister_target(¬rack_tg_reg); + return ret; + } + return 0; } static void __exit xt_ct_tg_exit(void) { xt_unregister_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg)); + xt_unregister_target(¬rack_tg_reg); } module_init(xt_ct_tg_init); @@ -394,3 +448,5 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: connection tracking target"); MODULE_ALIAS("ipt_CT"); MODULE_ALIAS("ip6t_CT"); +MODULE_ALIAS("ipt_NOTRACK"); +MODULE_ALIAS("ip6t_NOTRACK"); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 26a668a..a9d7af9 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -157,11 +157,22 @@ dsthash_find(const struct xt_hashlimit_htable *ht, /* allocate dsthash_ent, initialize dst, put in htable and lock it */ static struct dsthash_ent * dsthash_alloc_init(struct xt_hashlimit_htable *ht, - const struct dsthash_dst *dst) + const struct dsthash_dst *dst, bool *race) { struct dsthash_ent *ent; spin_lock(&ht->lock); + + /* Two or more packets may race to create the same entry in the + * hashtable, double check if this packet lost race. + */ + ent = dsthash_find(ht, dst); + if (ent != NULL) { + spin_unlock(&ht->lock); + *race = true; + return ent; + } + /* initialize hash with random val at the time we allocate * the first hashtable entry */ if (unlikely(!ht->rnd_initialized)) { @@ -318,7 +329,10 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo) parent = hashlimit_net->ipt_hashlimit; else parent = hashlimit_net->ip6t_hashlimit; - remove_proc_entry(hinfo->pde->name, parent); + + if(parent != NULL) + remove_proc_entry(hinfo->pde->name, parent); + htable_selective_cleanup(hinfo, select_all); vfree(hinfo); } @@ -585,6 +599,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; + bool race = false; u32 cost; if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) @@ -593,13 +608,18 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) rcu_read_lock_bh(); dh = dsthash_find(hinfo, &dst); if (dh == NULL) { - dh = dsthash_alloc_init(hinfo, &dst); + dh = dsthash_alloc_init(hinfo, &dst, &race); if (dh == NULL) { rcu_read_unlock_bh(); goto hotdrop; + } else if (race) { + /* Already got an entry, update expiration timeout */ + dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); + rateinfo_recalc(dh, now, hinfo->cfg.mode); + } else { + dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); + rateinfo_init(dh, hinfo); } - dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_init(dh, hinfo); } else { /* update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); @@ -856,6 +876,27 @@ static int __net_init hashlimit_proc_net_init(struct net *net) static void __net_exit hashlimit_proc_net_exit(struct net *net) { + struct xt_hashlimit_htable *hinfo; + struct hlist_node *pos; + struct proc_dir_entry *pde; + struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); + + /* recent_net_exit() is called before recent_mt_destroy(). Make sure + * that the parent xt_recent proc entry is is empty before trying to + * remove it. + */ + mutex_lock(&hashlimit_mutex); + pde = hashlimit_net->ipt_hashlimit; + if (pde == NULL) + pde = hashlimit_net->ip6t_hashlimit; + + hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) + remove_proc_entry(hinfo->pde->name, pde); + + hashlimit_net->ipt_hashlimit = NULL; + hashlimit_net->ip6t_hashlimit = NULL; + mutex_unlock(&hashlimit_mutex); + proc_net_remove(net, "ipt_hashlimit"); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) proc_net_remove(net, "ip6t_hashlimit"); @@ -872,9 +913,6 @@ static int __net_init hashlimit_net_init(struct net *net) static void __net_exit hashlimit_net_exit(struct net *net) { - struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); - - BUG_ON(!hlist_empty(&hashlimit_net->htables)); hashlimit_proc_net_exit(net); } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 4635c9b..978efc9 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -29,6 +29,7 @@ #include <linux/skbuff.h> #include <linux/inet.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -310,6 +311,14 @@ out: return ret; } +static void recent_table_free(void *addr) +{ + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + static int recent_mt_check(const struct xt_mtchk_param *par, const struct xt_recent_mtinfo_v1 *info) { @@ -322,6 +331,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par, #endif unsigned int i; int ret = -EINVAL; + size_t sz; if (unlikely(!hash_rnd_inited)) { get_random_bytes(&hash_rnd, sizeof(hash_rnd)); @@ -360,8 +370,11 @@ static int recent_mt_check(const struct xt_mtchk_param *par, goto out; } - t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size, - GFP_KERNEL); + sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size; + if (sz <= PAGE_SIZE) + t = kzalloc(sz, GFP_KERNEL); + else + t = vzalloc(sz); if (t == NULL) { ret = -ENOMEM; goto out; @@ -377,14 +390,14 @@ static int recent_mt_check(const struct xt_mtchk_param *par, uid = make_kuid(&init_user_ns, ip_list_uid); gid = make_kgid(&init_user_ns, ip_list_gid); if (!uid_valid(uid) || !gid_valid(gid)) { - kfree(t); + recent_table_free(t); ret = -EINVAL; goto out; } pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent, &recent_mt_fops, t); if (pde == NULL) { - kfree(t); + recent_table_free(t); ret = -ENOMEM; goto out; } @@ -431,10 +444,11 @@ static void recent_mt_destroy(const struct xt_mtdtor_param *par) list_del(&t->list); spin_unlock_bh(&recent_lock); #ifdef CONFIG_PROC_FS - remove_proc_entry(t->name, recent_net->xt_recent); + if (recent_net->xt_recent != NULL) + remove_proc_entry(t->name, recent_net->xt_recent); #endif recent_table_flush(t); - kfree(t); + recent_table_free(t); } mutex_unlock(&recent_mutex); } @@ -615,6 +629,20 @@ static int __net_init recent_proc_net_init(struct net *net) static void __net_exit recent_proc_net_exit(struct net *net) { + struct recent_net *recent_net = recent_pernet(net); + struct recent_table *t; + + /* recent_net_exit() is called before recent_mt_destroy(). Make sure + * that the parent xt_recent proc entry is is empty before trying to + * remove it. + */ + spin_lock_bh(&recent_lock); + list_for_each_entry(t, &recent_net->tables, list) + remove_proc_entry(t->name, recent_net->xt_recent); + + recent_net->xt_recent = NULL; + spin_unlock_bh(&recent_lock); + proc_net_remove(net, "xt_recent"); } #else @@ -638,9 +666,6 @@ static int __net_init recent_net_init(struct net *net) static void __net_exit recent_net_exit(struct net *net) { - struct recent_net *recent_net = recent_pernet(net); - - BUG_ON(!list_empty(&recent_net->tables)); recent_proc_net_exit(net); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c8a1eb6..c0353d5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -669,6 +669,9 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; + if (addr_len < sizeof(struct sockaddr_nl)) + return -EINVAL; + if (nladdr->nl_family != AF_NETLINK) return -EINVAL; @@ -2059,7 +2062,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) struct sock *s = v; struct netlink_sock *nlk = nlk_sk(s); - seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n", + seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %pK %-8d %-8d %-8lu\n", s, s->sk_protocol, nlk->portid, diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index a1e1162..31b74f5 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -434,12 +434,11 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event) version = RDS_PROTOCOL_3_0; while ((common >>= 1) != 0) version++; - } - printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using " - "incompatible protocol version %u.%u\n", - &dp->dp_saddr, - dp->dp_protocol_major, - dp->dp_protocol_minor); + } else + printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n", + &dp->dp_saddr, + dp->dp_protocol_major, + dp->dp_protocol_minor); return version; } diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 8c5bc85..8eb9501 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -339,8 +339,8 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn, sge->length = sizeof(struct rds_header); sge = &recv->r_sge[1]; - sge->addr = sg_dma_address(&recv->r_frag->f_sg); - sge->length = sg_dma_len(&recv->r_frag->f_sg); + sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg); + sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg); ret = 0; out: @@ -381,7 +381,10 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill) ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, recv->r_ibinc, sg_page(&recv->r_frag->f_sg), - (long) sg_dma_address(&recv->r_frag->f_sg), ret); + (long) ib_sg_dma_address( + ic->i_cm_id->device, + &recv->r_frag->f_sg), + ret); if (ret) { rds_ib_conn_error(conn, "recv post on " "%pI4 returned %d, disconnecting and " diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d2922c0..51561ea 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -919,7 +919,7 @@ ok: q->now = ktime_to_ns(ktime_get()); start_at = jiffies; - next_event = q->now + 5 * NSEC_PER_SEC; + next_event = q->now + 5LLU * NSEC_PER_SEC; for (level = 0; level < TC_HTB_MAXDEPTH; level++) { /* common case optimization - skip event handler quickly */ diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index a9edd2e..7521d94 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -66,12 +66,36 @@ config SCTP_DBG_OBJCNT 'cat /proc/net/sctp/sctp_dbg_objcnt' If unsure, say N +choice + prompt "Default SCTP cookie HMAC encoding" + default SCTP_DEFAULT_COOKIE_HMAC_MD5 + help + This option sets the default sctp cookie hmac algorithm + when in doubt select 'md5' + +config SCTP_DEFAULT_COOKIE_HMAC_MD5 + bool "Enable optional MD5 hmac cookie generation" + help + Enable optional MD5 hmac based SCTP cookie generation + select SCTP_COOKIE_HMAC_MD5 + +config SCTP_DEFAULT_COOKIE_HMAC_SHA1 + bool "Enable optional SHA1 hmac cookie generation" + help + Enable optional SHA1 hmac based SCTP cookie generation + select SCTP_COOKIE_HMAC_SHA1 + +config SCTP_DEFAULT_COOKIE_HMAC_NONE + bool "Use no hmac alg in SCTP cookie generation" + help + Use no hmac algorithm in SCTP cookie generation + +endchoice config SCTP_COOKIE_HMAC_MD5 bool "Enable optional MD5 hmac cookie generation" help Enable optional MD5 hmac based SCTP cookie generation - default y select CRYPTO_HMAC if SCTP_COOKIE_HMAC_MD5 select CRYPTO_MD5 if SCTP_COOKIE_HMAC_MD5 @@ -79,7 +103,6 @@ config SCTP_COOKIE_HMAC_SHA1 bool "Enable optional SHA1 hmac cookie generation" help Enable optional SHA1 hmac based SCTP cookie generation - default y select CRYPTO_HMAC if SCTP_COOKIE_HMAC_SHA1 select CRYPTO_SHA1 if SCTP_COOKIE_HMAC_SHA1 diff --git a/net/sctp/probe.c b/net/sctp/probe.c index bc6cd75..5f7518d 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -122,7 +122,8 @@ static const struct file_operations sctpprobe_fops = { .llseek = noop_llseek, }; -sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep, +sctp_disposition_t jsctp_sf_eat_sack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 2c7785b..f898b1c 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1191,9 +1191,9 @@ static int __net_init sctp_net_init(struct net *net) net->sctp.cookie_preserve_enable = 1; /* Default sctp sockets to use md5 as their hmac alg */ -#if defined (CONFIG_CRYPTO_MD5) +#if defined (CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5) net->sctp.sctp_hmac_alg = "md5"; -#elif defined (CONFIG_CRYPTO_SHA1) +#elif defined (CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1) net->sctp.sctp_hmac_alg = "sha1"; #else net->sctp.sctp_hmac_alg = NULL; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 909dc0c..6e5c824 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -192,17 +192,23 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct const void *q; unsigned int seclen; unsigned int timeout; + unsigned long now = jiffies; u32 window_size; int ret; - /* First unsigned int gives the lifetime (in seconds) of the cred */ + /* First unsigned int gives the remaining lifetime in seconds of the + * credential - e.g. the remaining TGT lifetime for Kerberos or + * the -t value passed to GSSD. + */ p = simple_get_bytes(p, end, &timeout, sizeof(timeout)); if (IS_ERR(p)) goto err; if (timeout == 0) timeout = GSSD_MIN_TIMEOUT; - ctx->gc_expiry = jiffies + (unsigned long)timeout * HZ * 3 / 4; - /* Sequence number window. Determines the maximum number of simultaneous requests */ + ctx->gc_expiry = now + ((unsigned long)timeout * HZ); + /* Sequence number window. Determines the maximum number of + * simultaneous requests + */ p = simple_get_bytes(p, end, &window_size, sizeof(window_size)); if (IS_ERR(p)) goto err; @@ -237,9 +243,12 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct p = ERR_PTR(ret); goto err; } + dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u\n", + __func__, ctx->gc_expiry, now, timeout); return q; err: - dprintk("RPC: %s returning %ld\n", __func__, -PTR_ERR(p)); + dprintk("RPC: %s returns %ld gc_expiry %lu now %lu timeout %u\n", + __func__, -PTR_ERR(p), ctx->gc_expiry, now, timeout); return p; } diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index a9c0bbc..890a299 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -59,7 +59,7 @@ static void xprt_free_allocation(struct rpc_rqst *req) struct xdr_buf *xbufp; dprintk("RPC: free allocations for req= %p\n", req); - BUG_ON(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); + WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); xbufp = &req->rq_private_buf; free_page((unsigned long)xbufp->head[0].iov_base); xbufp = &req->rq_snd_buf; @@ -191,7 +191,9 @@ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs) dprintk("RPC: destroy backchannel transport\n"); - BUG_ON(max_reqs == 0); + if (max_reqs == 0) + goto out; + spin_lock_bh(&xprt->bc_pa_lock); xprt_dec_alloc_count(xprt, max_reqs); list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { @@ -202,6 +204,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs) } spin_unlock_bh(&xprt->bc_pa_lock); +out: dprintk("RPC: backchannel list empty= %s\n", list_empty(&xprt->bc_pa_list) ? "true" : "false"); } @@ -255,7 +258,7 @@ void xprt_free_bc_request(struct rpc_rqst *req) dprintk("RPC: free backchannel req=%p\n", req); smp_mb__before_clear_bit(); - BUG_ON(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); + WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); smp_mb__after_clear_bit(); diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c index 0b2eb38..15c7a8a 100644 --- a/net/sunrpc/bc_svc.c +++ b/net/sunrpc/bc_svc.c @@ -53,7 +53,7 @@ int bc_send(struct rpc_rqst *req) if (IS_ERR(task)) ret = PTR_ERR(task); else { - BUG_ON(atomic_read(&task->tk_count) != 1); + WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); ret = task->tk_status; rpc_put_task(task); } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index fc2f7aa..9afa439 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -775,11 +775,11 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, if (rp->q.list.next == &cd->queue) { spin_unlock(&queue_lock); mutex_unlock(&inode->i_mutex); - BUG_ON(rp->offset); + WARN_ON_ONCE(rp->offset); return 0; } rq = container_of(rp->q.list.next, struct cache_request, q.list); - BUG_ON(rq->q.reader); + WARN_ON_ONCE(rq->q.reader); if (rp->offset == 0) rq->readers++; spin_unlock(&queue_lock); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cdc7564..507b5e8 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -132,8 +132,10 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, int error; dir = rpc_d_lookup_sb(sb, dir_name); - if (dir == NULL) + if (dir == NULL) { + pr_info("RPC: pipefs directory doesn't exist: %s\n", dir_name); return dir; + } for (;;) { q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); name[sizeof(name) - 1] = '\0'; @@ -192,7 +194,8 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event, case RPC_PIPEFS_MOUNT: dentry = rpc_setup_pipedir_sb(sb, clnt, clnt->cl_program->pipe_dir_name); - BUG_ON(dentry == NULL); + if (!dentry) + return -ENOENT; if (IS_ERR(dentry)) return PTR_ERR(dentry); clnt->cl_dentry = dentry; @@ -234,7 +237,7 @@ static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event) spin_lock(&sn->rpc_client_lock); list_for_each_entry(clnt, &sn->all_clients, cl_clients) { if (clnt->cl_program->pipe_dir_name == NULL) - break; + continue; if (rpc_clnt_skip_event(clnt, event)) continue; if (atomic_inc_not_zero(&clnt->cl_count) == 0) @@ -552,7 +555,7 @@ EXPORT_SYMBOL_GPL(rpc_clone_client); * rpc_clone_client_set_auth - Clone an RPC client structure and set its auth * * @clnt: RPC client whose parameters are copied - * @auth: security flavor for new client + * @flavor: security flavor for new client * * Returns a fresh RPC client or an ERR_PTR. */ @@ -607,6 +610,8 @@ EXPORT_SYMBOL_GPL(rpc_killall_tasks); */ void rpc_shutdown_client(struct rpc_clnt *clnt) { + might_sleep(); + dprintk_rcu("RPC: shutting down %s client for %s\n", clnt->cl_protname, rcu_dereference(clnt->cl_xprt)->servername); @@ -693,21 +698,19 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, const struct rpc_program *program, u32 vers) { + struct rpc_create_args args = { + .program = program, + .prognumber = program->number, + .version = vers, + .authflavor = old->cl_auth->au_flavor, + .client_name = old->cl_principal, + }; struct rpc_clnt *clnt; - const struct rpc_version *version; int err; - BUG_ON(vers >= program->nrvers || !program->version[vers]); - version = program->version[vers]; - clnt = rpc_clone_client(old); + clnt = __rpc_clone_client(&args, old); if (IS_ERR(clnt)) goto out; - clnt->cl_procinfo = version->procs; - clnt->cl_maxproc = version->nrprocs; - clnt->cl_protname = program->name; - clnt->cl_prog = program->number; - clnt->cl_vers = version->number; - clnt->cl_stats = program->stats; err = rpc_ping(clnt); if (err != 0) { rpc_shutdown_client(clnt); @@ -832,7 +835,12 @@ int rpc_call_sync(struct rpc_clnt *clnt, const struct rpc_message *msg, int flag }; int status; - BUG_ON(flags & RPC_TASK_ASYNC); + WARN_ON_ONCE(flags & RPC_TASK_ASYNC); + if (flags & RPC_TASK_ASYNC) { + rpc_release_calldata(task_setup_data.callback_ops, + task_setup_data.callback_data); + return -EINVAL; + } task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) @@ -908,7 +916,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, task->tk_action = call_bc_transmit; atomic_inc(&task->tk_count); - BUG_ON(atomic_read(&task->tk_count) != 2); + WARN_ON_ONCE(atomic_read(&task->tk_count) != 2); rpc_execute(task); out: @@ -1368,6 +1376,7 @@ call_refreshresult(struct rpc_task *task) return; case -ETIMEDOUT: rpc_delay(task, 3*HZ); + case -EKEYEXPIRED: case -EAGAIN: status = -EACCES; if (!task->tk_cred_retry) @@ -1654,7 +1663,6 @@ call_transmit(struct rpc_task *task) task->tk_action = call_transmit_status; /* Encode here so that rpcsec_gss can use correct sequence number. */ if (rpc_task_need_encode(task)) { - BUG_ON(task->tk_rqstp->rq_bytes_sent != 0); rpc_xdr_encode(task); /* Did the encode result in an error condition? */ if (task->tk_status != 0) { @@ -1738,7 +1746,6 @@ call_bc_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - BUG_ON(task->tk_status != 0); task->tk_status = xprt_prepare_transmit(task); if (task->tk_status == -EAGAIN) { /* @@ -1785,7 +1792,7 @@ call_bc_transmit(struct rpc_task *task) * We were unable to reply and will have to drop the * request. The server should reconnect and retransmit. */ - BUG_ON(task->tk_status == -EAGAIN); + WARN_ON_ONCE(task->tk_status == -EAGAIN); printk(KERN_NOTICE "RPC: Could not send backchannel reply " "error: %d\n", task->tk_status); break; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 80f5dd2..fd10981 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1093,7 +1093,7 @@ void rpc_put_sb_net(const struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - BUG_ON(sn->pipefs_sb == NULL); + WARN_ON(sn->pipefs_sb == NULL); mutex_unlock(&sn->pipefs_sb_lock); } EXPORT_SYMBOL_GPL(rpc_put_sb_net); @@ -1152,14 +1152,19 @@ static void rpc_kill_sb(struct super_block *sb) struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); mutex_lock(&sn->pipefs_sb_lock); + if (sn->pipefs_sb != sb) { + mutex_unlock(&sn->pipefs_sb_lock); + goto out; + } sn->pipefs_sb = NULL; mutex_unlock(&sn->pipefs_sb_lock); - put_net(net); dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", net, NET_NAME(net)); blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_UMOUNT, sb); + put_net(net); +out: kill_litter_super(sb); } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index a70acae..795a0f4 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -23,7 +23,6 @@ #include <linux/errno.h> #include <linux/mutex.h> #include <linux/slab.h> -#include <linux/nsproxy.h> #include <net/ipv6.h> #include <linux/sunrpc/clnt.h> @@ -884,7 +883,10 @@ static void encode_rpcb_string(struct xdr_stream *xdr, const char *string, u32 len; len = strlen(string); - BUG_ON(len > maxstrlen); + WARN_ON_ONCE(len > maxstrlen); + if (len > maxstrlen) + /* truncate and hope for the best */ + len = maxstrlen; p = xdr_reserve_space(xdr, 4 + len); xdr_encode_opaque(p, string, len); } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6357fcb..bfa3171 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -98,6 +98,23 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } +static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) +{ + queue->priority = priority; +} + +static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) +{ + queue->owner = pid; + queue->nr = RPC_BATCH_COUNT; +} + +static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) +{ + rpc_set_waitqueue_priority(queue, queue->maxpriority); + rpc_set_waitqueue_owner(queue, 0); +} + /* * Add new request to a priority queue. */ @@ -109,9 +126,11 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *t; INIT_LIST_HEAD(&task->u.tk_wait.links); - q = &queue->tasks[queue_priority]; if (unlikely(queue_priority > queue->maxpriority)) - q = &queue->tasks[queue->maxpriority]; + queue_priority = queue->maxpriority; + if (queue_priority > queue->priority) + rpc_set_waitqueue_priority(queue, queue_priority); + q = &queue->tasks[queue_priority]; list_for_each_entry(t, q, u.tk_wait.list) { if (t->tk_owner == task->tk_owner) { list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); @@ -133,7 +152,9 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task, unsigned char queue_priority) { - BUG_ON (RPC_IS_QUEUED(task)); + WARN_ON_ONCE(RPC_IS_QUEUED(task)); + if (RPC_IS_QUEUED(task)) + return; if (RPC_IS_PRIORITY(queue)) __rpc_add_wait_queue_priority(queue, task, queue_priority); @@ -178,24 +199,6 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas task->tk_pid, queue, rpc_qname(queue)); } -static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) -{ - queue->priority = priority; - queue->count = 1 << (priority * 2); -} - -static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) -{ - queue->owner = pid; - queue->nr = RPC_BATCH_COUNT; -} - -static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) -{ - rpc_set_waitqueue_priority(queue, queue->maxpriority); - rpc_set_waitqueue_owner(queue, 0); -} - static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues) { int i; @@ -334,7 +337,7 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, __rpc_add_wait_queue(q, task, queue_priority); - BUG_ON(task->tk_callback != NULL); + WARN_ON_ONCE(task->tk_callback != NULL); task->tk_callback = action; __rpc_add_timer(q, task); } @@ -343,7 +346,12 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action) { /* We shouldn't ever put an inactive task to sleep */ - BUG_ON(!RPC_IS_ACTIVATED(task)); + WARN_ON_ONCE(!RPC_IS_ACTIVATED(task)); + if (!RPC_IS_ACTIVATED(task)) { + task->tk_status = -EIO; + rpc_put_task_async(task); + return; + } /* * Protect the queue operations. @@ -358,7 +366,12 @@ void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action, int priority) { /* We shouldn't ever put an inactive task to sleep */ - BUG_ON(!RPC_IS_ACTIVATED(task)); + WARN_ON_ONCE(!RPC_IS_ACTIVATED(task)); + if (!RPC_IS_ACTIVATED(task)) { + task->tk_status = -EIO; + rpc_put_task_async(task); + return; + } /* * Protect the queue operations. @@ -367,6 +380,7 @@ void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW); spin_unlock_bh(&q->lock); } +EXPORT_SYMBOL_GPL(rpc_sleep_on_priority); /** * __rpc_do_wake_up_task - wake up a single rpc_task @@ -451,8 +465,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q /* * Check if we need to switch queues. */ - if (--queue->count) - goto new_owner; + goto new_owner; } /* @@ -697,7 +710,9 @@ static void __rpc_execute(struct rpc_task *task) dprintk("RPC: %5u __rpc_execute flags=0x%x\n", task->tk_pid, task->tk_flags); - BUG_ON(RPC_IS_QUEUED(task)); + WARN_ON_ONCE(RPC_IS_QUEUED(task)); + if (RPC_IS_QUEUED(task)) + return; for (;;) { void (*do_action)(struct rpc_task *); @@ -919,16 +934,35 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) return task; } +/* + * rpc_free_task - release rpc task and perform cleanups + * + * Note that we free up the rpc_task _after_ rpc_release_calldata() + * in order to work around a workqueue dependency issue. + * + * Tejun Heo states: + * "Workqueue currently considers two work items to be the same if they're + * on the same address and won't execute them concurrently - ie. it + * makes a work item which is queued again while being executed wait + * for the previous execution to complete. + * + * If a work function frees the work item, and then waits for an event + * which should be performed by another work item and *that* work item + * recycles the freed work item, it can create a false dependency loop. + * There really is no reliable way to detect this short of verifying + * every memory free." + * + */ static void rpc_free_task(struct rpc_task *task) { - const struct rpc_call_ops *tk_ops = task->tk_ops; - void *calldata = task->tk_calldata; + unsigned short tk_flags = task->tk_flags; + + rpc_release_calldata(task->tk_ops, task->tk_calldata); - if (task->tk_flags & RPC_TASK_DYNAMIC) { + if (tk_flags & RPC_TASK_DYNAMIC) { dprintk("RPC: %5u freeing task\n", task->tk_pid); mempool_free(task, rpc_task_mempool); } - rpc_release_calldata(tk_ops, calldata); } static void rpc_async_release(struct work_struct *work) @@ -938,8 +972,7 @@ static void rpc_async_release(struct work_struct *work) static void rpc_release_resources_task(struct rpc_task *task) { - if (task->tk_rqstp) - xprt_release(task); + xprt_release(task); if (task->tk_msg.rpc_cred) { put_rpccred(task->tk_msg.rpc_cred); task->tk_msg.rpc_cred = NULL; @@ -981,7 +1014,7 @@ static void rpc_release_task(struct rpc_task *task) { dprintk("RPC: %5u release task\n", task->tk_pid); - BUG_ON (RPC_IS_QUEUED(task)); + WARN_ON_ONCE(RPC_IS_QUEUED(task)); rpc_release_resources_task(task); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 3ee7461..dbf12ac 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -20,7 +20,6 @@ #include <linux/module.h> #include <linux/kthread.h> #include <linux/slab.h> -#include <linux/nsproxy.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/xdr.h> @@ -324,7 +323,9 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx) * The caller checks for sv_nrpools > 1, which * implies that we've been initialized. */ - BUG_ON(m->count == 0); + WARN_ON_ONCE(m->count == 0); + if (m->count == 0) + return; switch (m->mode) { case SVC_POOL_PERCPU: @@ -585,7 +586,9 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) * We assume one is at most one page */ arghi = 0; - BUG_ON(pages > RPCSVC_MAXPAGES); + WARN_ON_ONCE(pages > RPCSVC_MAXPAGES); + if (pages > RPCSVC_MAXPAGES) + pages = RPCSVC_MAXPAGES; while (pages) { struct page *p = alloc_pages_node(node, GFP_KERNEL, 0); if (!p) @@ -946,7 +949,9 @@ int svc_register(const struct svc_serv *serv, struct net *net, unsigned int i; int error = 0; - BUG_ON(proto == 0 && port == 0); + WARN_ON_ONCE(proto == 0 && port == 0); + if (proto == 0 && port == 0) + return -EINVAL; for (progp = serv->sv_program; progp; progp = progp->pg_next) { for (i = 0; i < progp->pg_nvers; i++) { @@ -1035,7 +1040,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) } /* - * Printk the given error with the address of the client that caused it. + * dprintk the given error with the address of the client that caused it. */ static __printf(2, 3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) @@ -1049,8 +1054,7 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - net_warn_ratelimited("svc: %s: %pV", - svc_print_addr(rqstp, buf, sizeof(buf)), &vaf); + dprintk("svc: %s: %pV", svc_print_addr(rqstp, buf, sizeof(buf)), &vaf); va_end(args); } @@ -1299,7 +1303,7 @@ svc_process(struct svc_rqst *rqstp) * Setup response xdr_buf. * Initially it has just one page */ - rqstp->rq_resused = 1; + rqstp->rq_next_page = &rqstp->rq_respages[1]; resv->iov_base = page_address(rqstp->rq_respages[0]); resv->iov_len = 0; rqstp->rq_res.pages = rqstp->rq_respages + 1; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 194d865..b8e47fa 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -218,7 +218,9 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, */ static void svc_xprt_received(struct svc_xprt *xprt) { - BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags)); + WARN_ON_ONCE(!test_bit(XPT_BUSY, &xprt->xpt_flags)); + if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) + return; /* As soon as we clear busy, the xprt could be closed and * 'put', so we need a reference to call svc_xprt_enqueue with: */ @@ -577,7 +579,10 @@ int svc_alloc_arg(struct svc_rqst *rqstp) /* now allocate needed pages. If we get a failure, sleep briefly */ pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; - BUG_ON(pages >= RPCSVC_MAXPAGES); + WARN_ON_ONCE(pages >= RPCSVC_MAXPAGES); + if (pages >= RPCSVC_MAXPAGES) + /* use as many pages as possible */ + pages = RPCSVC_MAXPAGES - 1; for (i = 0; i < pages ; i++) while (rqstp->rq_pages[i] == NULL) { struct page *p = alloc_page(GFP_KERNEL); @@ -926,7 +931,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt) spin_lock_bh(&serv->sv_lock); if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags)) list_del_init(&xprt->xpt_list); - BUG_ON(!list_empty(&xprt->xpt_ready)); + WARN_ON_ONCE(!list_empty(&xprt->xpt_ready)); if (test_bit(XPT_TEMP, &xprt->xpt_flags)) serv->sv_tmpcnt--; spin_unlock_bh(&serv->sv_lock); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 03827ce..0a148c9 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -84,7 +84,11 @@ static struct lock_class_key svc_slock_key[2]; static void svc_reclassify_socket(struct socket *sock) { struct sock *sk = sock->sk; - BUG_ON(sock_owned_by_user(sk)); + + WARN_ON_ONCE(sock_owned_by_user(sk)); + if (sock_owned_by_user(sk)) + return; + switch (sk->sk_family) { case AF_INET: sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", @@ -601,6 +605,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_respages = rqstp->rq_pages + 1 + DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE); } + rqstp->rq_next_page = rqstp->rq_respages+1; if (serv->sv_stats) serv->sv_stats->netudpcnt++; @@ -874,9 +879,9 @@ static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst { unsigned int i, len, npages; - if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + if (svsk->sk_datalen == 0) return 0; - len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { if (rqstp->rq_pages[i] != NULL) @@ -893,9 +898,9 @@ static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) { unsigned int i, len, npages; - if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + if (svsk->sk_datalen == 0) return; - len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { svsk->sk_pages[i] = rqstp->rq_pages[i]; @@ -907,9 +912,9 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) { unsigned int i, len, npages; - if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + if (svsk->sk_datalen == 0) goto out; - len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { BUG_ON(svsk->sk_pages[i] == NULL); @@ -918,13 +923,12 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) } out: svsk->sk_tcplen = 0; + svsk->sk_datalen = 0; } /* - * Receive data. + * Receive fragment record header. * If we haven't gotten the record length yet, get the next four bytes. - * Otherwise try to gobble up as much as possible up to the complete - * record length. */ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) { @@ -950,32 +954,16 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) return -EAGAIN; } - svsk->sk_reclen = ntohl(svsk->sk_reclen); - if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) { - /* FIXME: technically, a record can be fragmented, - * and non-terminal fragments will not have the top - * bit set in the fragment length header. - * But apparently no known nfs clients send fragmented - * records. */ - net_notice_ratelimited("RPC: multiple fragments per record not supported\n"); - goto err_delete; - } - - svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK; - dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); - if (svsk->sk_reclen > serv->sv_max_mesg) { - net_notice_ratelimited("RPC: fragment too large: 0x%08lx\n", - (unsigned long)svsk->sk_reclen); + dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk)); + if (svc_sock_reclen(svsk) + svsk->sk_datalen > + serv->sv_max_mesg) { + net_notice_ratelimited("RPC: fragment too large: %d\n", + svc_sock_reclen(svsk)); goto err_delete; } } - if (svsk->sk_reclen < 8) - goto err_delete; /* client is nuts. */ - - len = svsk->sk_reclen; - - return len; + return svc_sock_reclen(svsk); error: dprintk("RPC: TCP recv_record got %d\n", len); return len; @@ -1019,7 +1007,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) if (dst->iov_len < src->iov_len) return -EAGAIN; /* whatever; just giving up. */ memcpy(dst->iov_base, src->iov_base, src->iov_len); - xprt_complete_rqst(req->rq_task, svsk->sk_reclen); + xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); rqstp->rq_arg.len = 0; return 0; } @@ -1038,6 +1026,17 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) return i; } +static void svc_tcp_fragment_received(struct svc_sock *svsk) +{ + /* If we have more data, signal svc_xprt_enqueue() to try again */ + if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + dprintk("svc: TCP %s record (%d bytes)\n", + svc_sock_final_rec(svsk) ? "final" : "nonfinal", + svc_sock_reclen(svsk)); + svsk->sk_tcplen = 0; + svsk->sk_reclen = 0; +} /* * Receive data from a TCP socket. @@ -1064,29 +1063,39 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto error; base = svc_tcp_restore_pages(svsk, rqstp); - want = svsk->sk_reclen - base; + want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); vec = rqstp->rq_vec; pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], - svsk->sk_reclen); + svsk->sk_datalen + want); rqstp->rq_respages = &rqstp->rq_pages[pnum]; + rqstp->rq_next_page = rqstp->rq_respages + 1; /* Now receive data */ len = svc_partial_recvfrom(rqstp, vec, pnum, want, base); - if (len >= 0) + if (len >= 0) { svsk->sk_tcplen += len; - if (len != want) { + svsk->sk_datalen += len; + } + if (len != want || !svc_sock_final_rec(svsk)) { svc_tcp_save_pages(svsk, rqstp); if (len < 0 && len != -EAGAIN) - goto err_other; - dprintk("svc: incomplete TCP record (%d of %d)\n", - svsk->sk_tcplen, svsk->sk_reclen); + goto err_delete; + if (len == want) + svc_tcp_fragment_received(svsk); + else + dprintk("svc: incomplete TCP record (%d of %d)\n", + (int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)), + svc_sock_reclen(svsk)); goto err_noclose; } - rqstp->rq_arg.len = svsk->sk_reclen; + if (svc_sock_reclen(svsk) < 8) + goto err_delete; /* client is nuts. */ + + rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len; @@ -1103,11 +1112,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) len = receive_cb_reply(svsk, rqstp); /* Reset TCP read info */ - svsk->sk_reclen = 0; - svsk->sk_tcplen = 0; - /* If we have more data, signal svc_xprt_enqueue() to try again */ - if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + svsk->sk_datalen = 0; + svc_tcp_fragment_received(svsk); if (len < 0) goto error; @@ -1116,15 +1122,14 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (serv->sv_stats) serv->sv_stats->nettcpcnt++; - dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len); return rqstp->rq_arg.len; error: if (len != -EAGAIN) - goto err_other; + goto err_delete; dprintk("RPC: TCP recvfrom got EAGAIN\n"); return 0; -err_other: +err_delete: printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", svsk->sk_xprt.xpt_server->sv_name, -len); set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1301,6 +1306,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + svsk->sk_datalen = 0; memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 08f50af..5605563 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -318,7 +318,10 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) tail = buf->tail; head = buf->head; - BUG_ON (len > head->iov_len); + + WARN_ON_ONCE(len > head->iov_len); + if (len > head->iov_len) + len = head->iov_len; /* Shift the tail first */ if (tail->iov_len != 0) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index bd462a5..33811db 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1136,10 +1136,18 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) void xprt_release(struct rpc_task *task) { struct rpc_xprt *xprt; - struct rpc_rqst *req; + struct rpc_rqst *req = task->tk_rqstp; - if (!(req = task->tk_rqstp)) + if (req == NULL) { + if (task->tk_client) { + rcu_read_lock(); + xprt = rcu_dereference(task->tk_client->cl_xprt); + if (xprt->snd_task == task) + xprt_release_write(xprt, task); + rcu_read_unlock(); + } return; + } xprt = req->rq_xprt; if (task->tk_ops->rpc_count_stats != NULL) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 41cb63b..0ce7552 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -521,11 +521,11 @@ next_sge: rqstp->rq_pages[ch_no] = NULL; /* - * Detach res pages. svc_release must see a resused count of - * zero or it will attempt to put them. + * Detach res pages. If svc_release sees any it will attempt to + * put them. */ - while (rqstp->rq_resused) - rqstp->rq_respages[--rqstp->rq_resused] = NULL; + while (rqstp->rq_next_page != rqstp->rq_respages) + *(--rqstp->rq_next_page) = NULL; return err; } @@ -550,7 +550,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp, /* rq_respages starts after the last arg page */ rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; - rqstp->rq_resused = 0; + rqstp->rq_next_page = &rqstp->rq_arg.pages[page_no]; /* Rebuild rq_arg head and tail. */ rqstp->rq_arg.head[0] = head->arg.head[0]; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 42eb7ba..c1d124d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -548,6 +548,7 @@ static int send_reply(struct svcxprt_rdma *rdma, int sge_no; int sge_bytes; int page_no; + int pages; int ret; /* Post a recv buffer to handle another request. */ @@ -611,7 +612,8 @@ static int send_reply(struct svcxprt_rdma *rdma, * respages array. They are our pages until the I/O * completes. */ - for (page_no = 0; page_no < rqstp->rq_resused; page_no++) { + pages = rqstp->rq_next_page - rqstp->rq_respages; + for (page_no = 0; page_no < pages; page_no++) { ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; ctxt->count++; rqstp->rq_respages[page_no] = NULL; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 75853ca..68b0a81 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1746,7 +1746,6 @@ static inline void xs_reclassify_socketu(struct socket *sock) { struct sock *sk = sock->sk; - BUG_ON(sock_owned_by_user(sk)); sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC", &xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]); } @@ -1755,7 +1754,6 @@ static inline void xs_reclassify_socket4(struct socket *sock) { struct sock *sk = sock->sk; - BUG_ON(sock_owned_by_user(sk)); sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC", &xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]); } @@ -1764,13 +1762,16 @@ static inline void xs_reclassify_socket6(struct socket *sock) { struct sock *sk = sock->sk; - BUG_ON(sock_owned_by_user(sk)); sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); } static inline void xs_reclassify_socket(int family, struct socket *sock) { + WARN_ON_ONCE(sock_owned_by_user(sock->sk)); + if (sock_owned_by_user(sock->sk)) + return; + switch (family) { case AF_LOCAL: xs_reclassify_socketu(sock); @@ -1901,6 +1902,10 @@ static void xs_local_setup_socket(struct work_struct *work) dprintk("RPC: xprt %p: socket %s does not exist\n", xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); break; + case -ECONNREFUSED: + dprintk("RPC: xprt %p: connection refused for %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + break; default: printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n", __func__, -status, @@ -2329,9 +2334,11 @@ static void *bc_malloc(struct rpc_task *task, size_t size) struct page *page; struct rpc_buffer *buf; - BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer)); - page = alloc_page(GFP_KERNEL); + WARN_ON_ONCE(size > PAGE_SIZE - sizeof(struct rpc_buffer)); + if (size > PAGE_SIZE - sizeof(struct rpc_buffer)) + return NULL; + page = alloc_page(GFP_KERNEL); if (!page) return NULL; @@ -2393,7 +2400,6 @@ static int bc_send_request(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct svc_xprt *xprt; - struct svc_sock *svsk; u32 len; dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid)); @@ -2401,7 +2407,6 @@ static int bc_send_request(struct rpc_task *task) * Get the server socket associated with this callback xprt */ xprt = req->rq_xprt->bc_xprt; - svsk = container_of(xprt, struct svc_sock, sk_xprt); /* * Grab the mutex to serialize data as the connection is shared diff --git a/net/wireless/core.c b/net/wireless/core.c index 14d9904..b677eab 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -866,8 +866,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, /* allow mac80211 to determine the timeout */ wdev->ps_timeout = -1; - if (!dev->ethtool_ops) - dev->ethtool_ops = &cfg80211_ethtool_ops; + netdev_set_default_ethtool_ops(dev, &cfg80211_ethtool_ops); if ((wdev->iftype == NL80211_IFTYPE_STATION || wdev->iftype == NL80211_IFTYPE_P2P_CLIENT || diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 6e53089..82c4fc7 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2365,7 +2365,6 @@ int set_regdom(const struct ieee80211_regdomain *rd) return r; } -#ifdef CONFIG_HOTPLUG int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) { if (last_request && !last_request->processed) { @@ -2377,12 +2376,6 @@ int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } -#else -int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) -{ - return -ENODEV; -} -#endif /* CONFIG_HOTPLUG */ void wiphy_regulatory_register(struct wiphy *wiphy) { diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 9bf6d5e..1f6f01e 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -77,13 +77,11 @@ static void wiphy_dev_release(struct device *dev) cfg80211_dev_free(rdev); } -#ifdef CONFIG_HOTPLUG static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env) { /* TODO, we probably need stuff here */ return 0; } -#endif static int wiphy_suspend(struct device *dev, pm_message_t state) { @@ -134,9 +132,7 @@ struct class ieee80211_class = { .owner = THIS_MODULE, .dev_release = wiphy_dev_release, .dev_attrs = ieee80211_dev_attrs, -#ifdef CONFIG_HOTPLUG .dev_uevent = wiphy_uevent, -#endif .suspend = wiphy_suspend, .resume = wiphy_resume, .ns_type = &net_ns_type_operations, |