diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2016-07-04 15:15:03 (GMT) |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2016-07-04 15:15:03 (GMT) |
commit | 7813029a509c0c92a5b16d846defaa50a2d3d7f1 (patch) | |
tree | 1962ab81537b7155d185f4b0b06c0323ff4dc509 /net | |
parent | 3c9a6793bde1feb368cd6ef113c08a40a37f7517 (diff) | |
parent | a99cde438de0c4c0cecc1d1af1a55a75b10bfdef (diff) | |
download | linux-7813029a509c0c92a5b16d846defaa50a2d3d7f1.tar.xz |
Merge 4.7-rc6 into staging-next
We want the staging fixes in here as well.
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net')
61 files changed, 433 insertions, 345 deletions
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index fbd0acf..2fdebab 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -976,7 +976,8 @@ static int ax25_release(struct socket *sock) release_sock(sk); ax25_disconnect(ax25, 0); lock_sock(sk); - ax25_destroy_socket(ax25); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_destroy_socket(ax25); break; case AX25_STATE_3: diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 951cd57..5237dff 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -102,6 +102,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: + case AX25_STATE_2: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (!sk || sock_flag(sk, SOCK_DESTROY) || @@ -111,6 +112,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) sock_hold(sk); ax25_destroy_socket(ax25); bh_unlock_sock(sk); + /* Ungrab socket and destroy it */ sock_put(sk); } else ax25_destroy_socket(ax25); @@ -213,7 +215,8 @@ void ax25_ds_t1_timeout(ax25_cb *ax25) case AX25_STATE_2: if (ax25->n2count == ax25->n2) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_disconnect(ax25, ETIMEDOUT); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->n2count++; diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index 004467c9..2c0d6ef 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c @@ -38,6 +38,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: + case AX25_STATE_2: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (!sk || sock_flag(sk, SOCK_DESTROY) || @@ -47,6 +48,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) sock_hold(sk); ax25_destroy_socket(ax25); bh_unlock_sock(sk); + /* Ungrab socket and destroy it */ sock_put(sk); } else ax25_destroy_socket(ax25); @@ -144,7 +146,8 @@ void ax25_std_t1timer_expiry(ax25_cb *ax25) case AX25_STATE_2: if (ax25->n2count == ax25->n2) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_disconnect(ax25, ETIMEDOUT); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->n2count++; diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 3b78e84..655a7d4 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -264,7 +264,8 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); - ax25_stop_heartbeat(ax25); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_stop_heartbeat(ax25); ax25_stop_t1timer(ax25); ax25_stop_t2timer(ax25); ax25_stop_t3timer(ax25); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index e3857ed..6c2901a 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -374,6 +374,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, if (skb_cow(skb, ETH_HLEN) < 0) goto out; + ethhdr = eth_hdr(skb); icmph = (struct batadv_icmp_header *)skb->data; icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph; if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 343d2c9..287a387 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1033,7 +1033,9 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface) static void batadv_softif_destroy_netlink(struct net_device *soft_iface, struct list_head *head) { + struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_hard_iface *hard_iface; + struct batadv_softif_vlan *vlan; list_for_each_entry(hard_iface, &batadv_hardif_list, list) { if (hard_iface->soft_iface == soft_iface) @@ -1041,6 +1043,13 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface, BATADV_IF_CLEANUP_KEEP); } + /* destroy the "untagged" VLAN */ + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (vlan) { + batadv_softif_destroy_vlan(bat_priv, vlan); + batadv_softif_vlan_put(vlan); + } + batadv_sysfs_del_meshif(soft_iface); unregister_netdevice_queue(soft_iface, head); } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index feaf492b..57ec87f 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -650,8 +650,10 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, /* increase the refcounter of the related vlan */ vlan = batadv_softif_vlan_get(bat_priv, vid); - if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d", - addr, BATADV_PRINT_VID(vid))) { + if (!vlan) { + net_ratelimited_function(batadv_info, soft_iface, + "adding TT local entry %pM to non-existent VLAN %d\n", + addr, BATADV_PRINT_VID(vid)); kfree(tt_local); tt_local = NULL; goto out; @@ -691,7 +693,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (unlikely(hash_added != 0)) { /* remove the reference for the hash */ batadv_tt_local_entry_put(tt_local); - batadv_softif_vlan_put(vlan); goto out; } @@ -2269,6 +2270,29 @@ static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv, return crc; } +/** + * batadv_tt_req_node_release - free tt_req node entry + * @ref: kref pointer of the tt req_node entry + */ +static void batadv_tt_req_node_release(struct kref *ref) +{ + struct batadv_tt_req_node *tt_req_node; + + tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount); + + kfree(tt_req_node); +} + +/** + * batadv_tt_req_node_put - decrement the tt_req_node refcounter and + * possibly release it + * @tt_req_node: tt_req_node to be free'd + */ +static void batadv_tt_req_node_put(struct batadv_tt_req_node *tt_req_node) +{ + kref_put(&tt_req_node->refcount, batadv_tt_req_node_release); +} + static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) { struct batadv_tt_req_node *node; @@ -2278,7 +2302,7 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { hlist_del_init(&node->list); - kfree(node); + batadv_tt_req_node_put(node); } spin_unlock_bh(&bat_priv->tt.req_list_lock); @@ -2315,7 +2339,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv) if (batadv_has_timed_out(node->issued_at, BATADV_TT_REQUEST_TIMEOUT)) { hlist_del_init(&node->list); - kfree(node); + batadv_tt_req_node_put(node); } } spin_unlock_bh(&bat_priv->tt.req_list_lock); @@ -2347,9 +2371,11 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv, if (!tt_req_node) goto unlock; + kref_init(&tt_req_node->refcount); ether_addr_copy(tt_req_node->addr, orig_node->orig); tt_req_node->issued_at = jiffies; + kref_get(&tt_req_node->refcount); hlist_add_head(&tt_req_node->list, &bat_priv->tt.req_list); unlock: spin_unlock_bh(&bat_priv->tt.req_list_lock); @@ -2613,13 +2639,19 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv, out: if (primary_if) batadv_hardif_put(primary_if); + if (ret && tt_req_node) { spin_lock_bh(&bat_priv->tt.req_list_lock); - /* hlist_del_init() verifies tt_req_node still is in the list */ - hlist_del_init(&tt_req_node->list); + if (!hlist_unhashed(&tt_req_node->list)) { + hlist_del_init(&tt_req_node->list); + batadv_tt_req_node_put(tt_req_node); + } spin_unlock_bh(&bat_priv->tt.req_list_lock); - kfree(tt_req_node); } + + if (tt_req_node) + batadv_tt_req_node_put(tt_req_node); + kfree(tvlv_tt_data); return ret; } @@ -3055,7 +3087,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv, if (!batadv_compare_eth(node->addr, resp_src)) continue; hlist_del_init(&node->list); - kfree(node); + batadv_tt_req_node_put(node); } spin_unlock_bh(&bat_priv->tt.req_list_lock); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 6a577f4..ba846b0 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1137,11 +1137,13 @@ struct batadv_tt_change_node { * struct batadv_tt_req_node - data to keep track of the tt requests in flight * @addr: mac address address of the originator this request was sent to * @issued_at: timestamp used for purging stale tt requests + * @refcount: number of contexts the object is used by * @list: list node for batadv_priv_tt::req_list */ struct batadv_tt_req_node { u8 addr[ETH_ALEN]; unsigned long issued_at; + struct kref refcount; struct hlist_node list; }; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 1607977..43d2cd8 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -213,8 +213,7 @@ drop: } EXPORT_SYMBOL_GPL(br_handle_frame_finish); -/* note: already called with rcu_read_lock */ -static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +static void __br_handle_local_finish(struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); u16 vid = 0; @@ -222,6 +221,14 @@ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_bu /* check if vlan is allowed, to avoid spoofing */ if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); +} + +/* note: already called with rcu_read_lock */ +static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct net_bridge_port *p = br_port_get_rcu(skb->dev); + + __br_handle_local_finish(skb); BR_INPUT_SKB_CB(skb)->brdev = p->br->dev; br_pass_frame_up(skb); @@ -274,7 +281,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) if (p->br->stp_enabled == BR_NO_STP || fwd_mask & (1u << dest[5])) goto forward; - break; + *pskb = skb; + __br_handle_local_finish(skb); + return RX_HANDLER_PASS; case 0x01: /* IEEE MAC (Pause) */ goto drop; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 6852f3c..4384414 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -464,8 +464,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, &ip6h->saddr)) { kfree_skb(skb); + br->has_ipv6_addr = 0; return NULL; } + + br->has_ipv6_addr = 1; ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); @@ -1745,6 +1748,7 @@ void br_multicast_init(struct net_bridge *br) br->ip6_other_query.delay_time = 0; br->ip6_querier.port = NULL; #endif + br->has_ipv6_addr = 1; spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index a5343c7..85e89f6 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1273,7 +1273,7 @@ static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, struct bridge_vlan_xstats vxi; struct br_vlan_stats stats; - if (vl_idx++ < *prividx) + if (++vl_idx < *prividx) continue; memset(&vxi, 0, sizeof(vxi)); vxi.vid = v->vid; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c7fb5d7..52edecf 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -314,6 +314,7 @@ struct net_bridge u8 multicast_disabled:1; u8 multicast_querier:1; u8 multicast_query_use_ifaddr:1; + u8 has_ipv6_addr:1; u32 hash_elasticity; u32 hash_max; @@ -588,10 +589,22 @@ static inline bool br_multicast_is_router(struct net_bridge *br) static inline bool __br_multicast_querier_exists(struct net_bridge *br, - struct bridge_mcast_other_query *querier) + struct bridge_mcast_other_query *querier, + const bool is_ipv6) { + bool own_querier_enabled; + + if (br->multicast_querier) { + if (is_ipv6 && !br->has_ipv6_addr) + own_querier_enabled = false; + else + own_querier_enabled = true; + } else { + own_querier_enabled = false; + } + return time_is_before_jiffies(querier->delay_time) && - (br->multicast_querier || timer_pending(&querier->timer)); + (own_querier_enabled || timer_pending(&querier->timer)); } static inline bool br_multicast_querier_exists(struct net_bridge *br, @@ -599,10 +612,12 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, { switch (eth->h_proto) { case (htons(ETH_P_IP)): - return __br_multicast_querier_exists(br, &br->ip4_other_query); + return __br_multicast_querier_exists(br, + &br->ip4_other_query, false); #if IS_ENABLED(CONFIG_IPV6) case (htons(ETH_P_IPV6)): - return __br_multicast_querier_exists(br, &br->ip6_other_query); + return __br_multicast_querier_exists(br, + &br->ip6_other_query, true); #endif default: return false; diff --git a/net/core/filter.c b/net/core/filter.c index 68adb5f..c4b330c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2085,7 +2085,8 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type) } static bool sk_filter_is_valid_access(int off, int size, - enum bpf_access_type type) + enum bpf_access_type type, + enum bpf_reg_type *reg_type) { switch (off) { case offsetof(struct __sk_buff, tc_classid): @@ -2108,7 +2109,8 @@ static bool sk_filter_is_valid_access(int off, int size, } static bool tc_cls_act_is_valid_access(int off, int size, - enum bpf_access_type type) + enum bpf_access_type type, + enum bpf_reg_type *reg_type) { if (type == BPF_WRITE) { switch (off) { @@ -2123,6 +2125,16 @@ static bool tc_cls_act_is_valid_access(int off, int size, return false; } } + + switch (off) { + case offsetof(struct __sk_buff, data): + *reg_type = PTR_TO_PACKET; + break; + case offsetof(struct __sk_buff, data_end): + *reg_type = PTR_TO_PACKET_END; + break; + } + return __is_valid_access(off, size, type); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 29dd8cc..510cd62 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2469,13 +2469,17 @@ int neigh_xmit(int index, struct net_device *dev, tbl = neigh_tables[index]; if (!tbl) goto out; + rcu_read_lock_bh(); neigh = __neigh_lookup_noref(tbl, addr, dev); if (!neigh) neigh = __neigh_create(tbl, addr, dev, false); err = PTR_ERR(neigh); - if (IS_ERR(neigh)) + if (IS_ERR(neigh)) { + rcu_read_unlock_bh(); goto out_kfree_skb; + } err = neigh->output(neigh, skb); + rcu_read_unlock_bh(); } else if (index == NEIGH_LINK_TABLE) { err = dev_hard_header(skb, dev, ntohs(skb->protocol), diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 4779374..d95631d 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -23,6 +23,11 @@ struct esp_skb_cb { void *tmp; }; +struct esp_output_extra { + __be32 seqhi; + u32 esphoff; +}; + #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); @@ -35,11 +40,11 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); * * TODO: Use spare space in skb for this where possible. */ -static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int extralen) { unsigned int len; - len = seqhilen; + len = extralen; len += crypto_aead_ivsize(aead); @@ -57,15 +62,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) return kmalloc(len, GFP_ATOMIC); } -static inline __be32 *esp_tmp_seqhi(void *tmp) +static inline void *esp_tmp_extra(void *tmp) { - return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); + return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra)); } -static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) + +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int extralen) { return crypto_aead_ivsize(aead) ? - PTR_ALIGN((u8 *)tmp + seqhilen, - crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; + PTR_ALIGN((u8 *)tmp + extralen, + crypto_aead_alignmask(aead) + 1) : tmp + extralen; } static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) @@ -99,7 +105,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) { struct ip_esp_hdr *esph = (void *)(skb->data + offset); void *tmp = ESP_SKB_CB(skb)->tmp; - __be32 *seqhi = esp_tmp_seqhi(tmp); + __be32 *seqhi = esp_tmp_extra(tmp); esph->seq_no = esph->spi; esph->spi = *seqhi; @@ -107,7 +113,11 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) static void esp_output_restore_header(struct sk_buff *skb) { - esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); + void *tmp = ESP_SKB_CB(skb)->tmp; + struct esp_output_extra *extra = esp_tmp_extra(tmp); + + esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff - + sizeof(__be32)); } static void esp_output_done_esn(struct crypto_async_request *base, int err) @@ -121,6 +131,7 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { int err; + struct esp_output_extra *extra; struct ip_esp_hdr *esph; struct crypto_aead *aead; struct aead_request *req; @@ -137,8 +148,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int tfclen; int nfrags; int assoclen; - int seqhilen; - __be32 *seqhi; + int extralen; __be64 seqno; /* skb is pure payload to encrypt */ @@ -166,21 +176,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) nfrags = err; assoclen = sizeof(*esph); - seqhilen = 0; + extralen = 0; if (x->props.flags & XFRM_STATE_ESN) { - seqhilen += sizeof(__be32); - assoclen += seqhilen; + extralen += sizeof(*extra); + assoclen += sizeof(__be32); } - tmp = esp_alloc_tmp(aead, nfrags, seqhilen); + tmp = esp_alloc_tmp(aead, nfrags, extralen); if (!tmp) { err = -ENOMEM; goto error; } - seqhi = esp_tmp_seqhi(tmp); - iv = esp_tmp_iv(aead, tmp, seqhilen); + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -247,8 +257,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); - *seqhi = esph->spi; + extra->esphoff = (unsigned char *)esph - + skb_transport_header(skb); + esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); + extra->seqhi = esph->spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); aead_request_set_callback(req, 0, esp_output_done_esn, skb); } @@ -445,7 +457,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) goto out; ESP_SKB_CB(skb)->tmp = tmp; - seqhi = esp_tmp_seqhi(tmp); + seqhi = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 4c39f4f..de1d119 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -62,26 +62,26 @@ EXPORT_SYMBOL_GPL(gre_del_protocol); /* Fills in tpi and returns header length to be pulled. */ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err, __be16 proto) + bool *csum_err, __be16 proto, int nhs) { const struct gre_base_hdr *greh; __be32 *options; int hdr_len; - if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + if (unlikely(!pskb_may_pull(skb, nhs + sizeof(struct gre_base_hdr)))) return -EINVAL; - greh = (struct gre_base_hdr *)skb_transport_header(skb); + greh = (struct gre_base_hdr *)(skb->data + nhs); if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; tpi->flags = gre_flags_to_tnl_flags(greh->flags); hdr_len = gre_calc_hlen(tpi->flags); - if (!pskb_may_pull(skb, hdr_len)) + if (!pskb_may_pull(skb, nhs + hdr_len)) return -EINVAL; - greh = (struct gre_base_hdr *)skb_transport_header(skb); + greh = (struct gre_base_hdr *)(skb->data + nhs); tpi->proto = greh->protocol; options = (__be32 *)(greh + 1); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 4d2025f..1d000af 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -49,12 +49,6 @@ #include <net/gre.h> #include <net/dst_metadata.h> -#if IS_ENABLED(CONFIG_IPV6) -#include <net/ipv6.h> -#include <net/ip6_fib.h> -#include <net/ip6_route.h> -#endif - /* Problems & solutions -------------------- @@ -217,12 +211,14 @@ static void gre_err(struct sk_buff *skb, u32 info) * by themselves??? */ + const struct iphdr *iph = (struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct tnl_ptk_info tpi; bool csum_err = false; - if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)) < 0) { + if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), + iph->ihl * 4) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -338,7 +334,7 @@ static int gre_rcv(struct sk_buff *skb) } #endif - hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0); if (hdr_len < 0) goto drop; @@ -1121,6 +1117,7 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, { struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; + LIST_HEAD(list_kill); struct ip_tunnel *t; int err; @@ -1136,8 +1133,10 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, t->collect_md = true; err = ipgre_newlink(net, dev, tb, NULL); - if (err < 0) - goto out; + if (err < 0) { + free_netdev(dev); + return ERR_PTR(err); + } /* openvswitch users expect packet sizes to be unrestricted, * so set the largest MTU we can. @@ -1146,9 +1145,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, if (err) goto out; + err = rtnl_configure_link(dev, NULL); + if (err < 0) + goto out; + return dev; out: - free_netdev(dev); + ip_tunnel_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(gretap_fb_dev_create); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 2ed9dd2..1d71c40 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -127,7 +127,9 @@ __be32 ic_myaddr = NONE; /* My IP address */ static __be32 ic_netmask = NONE; /* Netmask for local subnet */ __be32 ic_gateway = NONE; /* Gateway IP address */ -__be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ +#ifdef IPCONFIG_DYNAMIC +static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ +#endif __be32 ic_servaddr = NONE; /* Boot server IP address */ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 21a38e2..5ad48ec 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -891,8 +891,10 @@ static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); - if (c) + if (c) { + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXVIFS; + } return c; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8bd9911..e00e972 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2751,7 +2751,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; struct sk_buff *hole = NULL; - u32 last_lost; + u32 max_segs, last_lost; int mib_idx; int fwd_rexmitting = 0; @@ -2771,6 +2771,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) last_lost = tp->snd_una; } + max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk)); tcp_for_write_queue_from(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; int segs; @@ -2784,6 +2785,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk) segs = tp->snd_cwnd - tcp_packets_in_flight(tp); if (segs <= 0) return; + /* In case tcp_shift_skb_data() have aggregated large skbs, + * we need to make sure not sending too bigs TSO packets + */ + segs = min_t(int, segs, max_segs); if (fwd_rexmitting) { begin_fwd: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0ff31d9..ca5e8ea 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -391,9 +391,9 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); } -static inline int compute_score(struct sock *sk, struct net *net, - __be32 saddr, unsigned short hnum, __be16 sport, - __be32 daddr, __be16 dport, int dif) +static int compute_score(struct sock *sk, struct net *net, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned short hnum, int dif) { int score; struct inet_sock *inet; @@ -434,52 +434,6 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } -/* - * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num) - */ -static inline int compute_score2(struct sock *sk, struct net *net, - __be32 saddr, __be16 sport, - __be32 daddr, unsigned int hnum, int dif) -{ - int score; - struct inet_sock *inet; - - if (!net_eq(sock_net(sk), net) || - ipv6_only_sock(sk)) - return -1; - - inet = inet_sk(sk); - - if (inet->inet_rcv_saddr != daddr || - inet->inet_num != hnum) - return -1; - - score = (sk->sk_family == PF_INET) ? 2 : 1; - - if (inet->inet_daddr) { - if (inet->inet_daddr != saddr) - return -1; - score += 4; - } - - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score += 4; - } - - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score += 4; - } - - if (sk->sk_incoming_cpu == raw_smp_processor_id()) - score++; - - return score; -} - static u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) @@ -492,11 +446,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, udp_ehash_secret + net_hash_mix(net)); } -/* called with read_rcu_lock() */ +/* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2, + struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; @@ -506,7 +460,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, result = NULL; badness = 0; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score2(sk, net, saddr, sport, + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; @@ -554,17 +508,22 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2, skb); + hslot2, skb); if (!result) { + unsigned int old_slot2 = slot2; hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; + /* avoid searching the same slot again. */ + if (unlikely(slot2 == old_slot2)) + return result; + hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) goto begin; result = udp4_lib_lookup2(net, saddr, sport, - htonl(INADDR_ANY), hnum, dif, - hslot2, slot2, skb); + daddr, hnum, dif, + hslot2, skb); } return result; } @@ -572,8 +531,8 @@ begin: result = NULL; badness = 0; sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, saddr, hnum, sport, - daddr, dport, dif); + score = compute_score(sk, net, saddr, sport, + daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -1755,8 +1714,11 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, return err; } - return skb_checksum_init_zero_check(skb, proto, uh->check, - inet_compute_pseudo); + /* Note, we are only interested in != 0 or == 0, thus the + * force to int. + */ + return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + inet_compute_pseudo); } /* diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4527285..a4fa840 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -98,7 +98,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) - ping_err(skb, offset, info); + ping_err(skb, offset, ntohl(info)); } static int icmpv6_rcv(struct sk_buff *skb); diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index b2025bf..c0cbcb2 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -78,9 +78,12 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) * we accept a checksum of zero here. When we find the socket * for the UDP packet we'll check if that socket allows zero checksum * for IPv6 (set by socket option). + * + * Note, we are only interested in != 0 or == 0, thus the + * force to int. */ - return skb_checksum_init_zero_check(skb, proto, uh->check, - ip6_compute_pseudo); + return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + ip6_compute_pseudo); } EXPORT_SYMBOL(udp6_csum_init); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index fdc9de2..776d145 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -468,7 +468,7 @@ static int gre_rcv(struct sk_buff *skb) bool csum_err = false; int hdr_len; - hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6)); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0); if (hdr_len < 0) goto drop; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index f2e2013f8..487ef3b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1074,6 +1074,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void) struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (!c) return NULL; + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXMIFS; return c; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 969913d..520b788 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1782,7 +1782,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, }; struct fib6_table *table; struct rt6_info *rt; - int flags = 0; + int flags = RT6_LOOKUP_F_IFACE; table = fib6_get_table(net, cfg->fc_table); if (!table) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 0a5a255..0619ac7 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -560,13 +560,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_IPV6, 0); + t->parms.link, 0, iph->protocol, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - IPPROTO_IPV6, 0); + iph->protocol, 0); err = 0; goto out; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f36c2d0..2255d2b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -738,7 +738,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int rst, - u8 tclass, u32 label) + u8 tclass, __be32 label) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -911,7 +911,7 @@ out: static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, u8 tclass, - u32 label) + __be32 label) { tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass, label); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f421c9f..005dc82 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -115,11 +115,10 @@ static void udp_v6_rehash(struct sock *sk) udp_lib_rehash(sk, new_hash); } -static inline int compute_score(struct sock *sk, struct net *net, - unsigned short hnum, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, __be16 dport, - int dif) +static int compute_score(struct sock *sk, struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, unsigned short hnum, + int dif) { int score; struct inet_sock *inet; @@ -162,54 +161,11 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } -static inline int compute_score2(struct sock *sk, struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, - unsigned short hnum, int dif) -{ - int score; - struct inet_sock *inet; - - if (!net_eq(sock_net(sk), net) || - udp_sk(sk)->udp_port_hash != hnum || - sk->sk_family != PF_INET6) - return -1; - - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) - return -1; - - score = 0; - inet = inet_sk(sk); - - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } - - if (sk->sk_incoming_cpu == raw_smp_processor_id()) - score++; - - return score; -} - -/* called with read_rcu_lock() */ +/* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2, + struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; @@ -219,7 +175,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, result = NULL; badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score2(sk, net, saddr, sport, + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; @@ -268,17 +224,22 @@ struct sock *__udp6_lib_lookup(struct net *net, result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2, skb); + hslot2, skb); if (!result) { + unsigned int old_slot2 = slot2; hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); slot2 = hash2 & udptable->mask; + /* avoid searching the same slot again. */ + if (unlikely(slot2 == old_slot2)) + return result; + hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) goto begin; result = udp6_lib_lookup2(net, saddr, sport, - &in6addr_any, hnum, dif, - hslot2, slot2, skb); + daddr, hnum, dif, + hslot2, skb); } return result; } @@ -286,7 +247,7 @@ begin: result = NULL; badness = -1; sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 7380087..fda7f47 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -241,6 +241,7 @@ static const struct file_operations kcm_seq_fops = { .open = kcm_seq_open, .read = seq_read, .llseek = seq_lseek, + .release = seq_release_net, }; static struct kcm_seq_muxinfo kcm_seq_muxinfo = { diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 21b1fdf..6a1603b 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -148,14 +148,17 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) void mesh_sta_cleanup(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - u32 changed; + u32 changed = 0; /* * maybe userspace handles peer allocation and peering, but in either * case the beacon is still generated by the kernel and we might need * an update. */ - changed = mesh_accept_plinks_update(sdata); + if (sdata->u.mesh.user_mpm && + sta->mesh->plink_state == NL80211_PLINK_ESTAB) + changed |= mesh_plink_dec_estab_count(sdata); + changed |= mesh_accept_plinks_update(sdata); if (!sdata->u.mesh.user_mpm) { changed |= mesh_plink_deactivate(sta); del_timer_sync(&sta->mesh->plink_timer); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index db2312e..f204274 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1544,6 +1544,8 @@ void nf_conntrack_cleanup_end(void) nf_conntrack_tstamp_fini(); nf_conntrack_acct_fini(); nf_conntrack_expect_fini(); + + kmem_cache_destroy(nf_conntrack_cachep); } /* diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7b7aa87..2c88187 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2946,24 +2946,20 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, * jumps are already validated for that chain. */ list_for_each_entry(i, &set->bindings, list) { - if (binding->flags & NFT_SET_MAP && + if (i->flags & NFT_SET_MAP && i->chain == binding->chain) goto bind; } + iter.genmask = nft_genmask_next(ctx->net); iter.skip = 0; iter.count = 0; iter.err = 0; iter.fn = nf_tables_bind_check_setelem; set->ops->walk(ctx, set, &iter); - if (iter.err < 0) { - /* Destroy anonymous sets if binding fails */ - if (set->flags & NFT_SET_ANONYMOUS) - nf_tables_set_destroy(ctx, set); - + if (iter.err < 0) return iter.err; - } } bind: binding->chain = ctx->chain; @@ -3192,12 +3188,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (nest == NULL) goto nla_put_failure; - args.cb = cb; - args.skb = skb; - args.iter.skip = cb->args[0]; - args.iter.count = 0; - args.iter.err = 0; - args.iter.fn = nf_tables_dump_setelem; + args.cb = cb; + args.skb = skb; + args.iter.genmask = nft_genmask_cur(ctx.net); + args.iter.skip = cb->args[0]; + args.iter.count = 0; + args.iter.err = 0; + args.iter.fn = nf_tables_dump_setelem; set->ops->walk(&ctx, set, &args.iter); nla_nest_end(skb, nest); @@ -4284,6 +4281,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, binding->chain != chain) continue; + iter.genmask = nft_genmask_next(ctx->net); iter.skip = 0; iter.count = 0; iter.err = 0; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index e9f8dff..fb8b589 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -143,7 +143,7 @@ next_rule: list_for_each_entry_continue_rcu(rule, &chain->rules, list) { /* This rule is not active, skip. */ - if (unlikely(rule->genmask & (1 << gencursor))) + if (unlikely(rule->genmask & gencursor)) continue; rulenum++; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 6fa0165..f39c53a 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -189,7 +189,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_hash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int err; err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); @@ -218,7 +217,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, goto cont; if (nft_set_elem_expired(&he->ext)) goto cont; - if (!nft_set_elem_active(&he->ext, genmask)) + if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; elem.priv = he; diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index f762094..7201d57 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -211,7 +211,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_rbtree_elem *rbe; struct nft_set_elem elem; struct rb_node *node; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); spin_lock_bh(&nft_rbtree_lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { @@ -219,7 +218,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (iter->count < iter->skip) goto cont; - if (!nft_set_elem_active(&rbe->ext, genmask)) + if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; elem.priv = rbe; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 3d5feed..d843125 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -818,8 +818,18 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, */ state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; __ovs_ct_update_key(key, state, &info->zone, exp->master); - } else - return __ovs_ct_lookup(net, key, info, skb); + } else { + struct nf_conn *ct; + int err; + + err = __ovs_ct_lookup(net, key, info, skb); + if (err) + return err; + + ct = (struct nf_conn *)skb->nfct; + if (ct) + nf_ct_deliver_cached_events(ct); + } return 0; } diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 310cabc..7c2a65a 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -111,7 +111,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even } } - if (conn->c_version < RDS_PROTOCOL(3,1)) { + if (conn->c_version < RDS_PROTOCOL(3, 1)) { printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed," " no longer supported\n", &conn->c_faddr, diff --git a/net/rds/loop.c b/net/rds/loop.c index 6b12b68..814173b 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -95,8 +95,9 @@ out: */ static void rds_loop_inc_free(struct rds_incoming *inc) { - struct rds_message *rm = container_of(inc, struct rds_message, m_inc); - rds_message_put(rm); + struct rds_message *rm = container_of(inc, struct rds_message, m_inc); + + rds_message_put(rm); } /* we need to at least give the thread something to succeed */ diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index c173f69..e381bbc 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -102,7 +102,8 @@ int rds_sysctl_init(void) rds_sysctl_reconnect_min = msecs_to_jiffies(1); rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min; - rds_sysctl_reg_table = register_net_sysctl(&init_net,"net/rds", rds_sysctl_rds_table); + rds_sysctl_reg_table = + register_net_sysctl(&init_net, "net/rds", rds_sysctl_rds_table); if (!rds_sysctl_reg_table) return -ENOMEM; return 0; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index ec0602b..7940bab 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -83,7 +83,7 @@ int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to); void rds_tcp_xmit_prepare(struct rds_connection *conn); void rds_tcp_xmit_complete(struct rds_connection *conn); int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, - unsigned int hdr_off, unsigned int sg, unsigned int off); + unsigned int hdr_off, unsigned int sg, unsigned int off); void rds_tcp_write_space(struct sock *sk); /* tcp_stats.c */ diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index fba13d0..f6e95d6 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -54,19 +54,19 @@ void rds_tcp_state_change(struct sock *sk) rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state); - switch(sk->sk_state) { - /* ignore connecting sockets as they make progress */ - case TCP_SYN_SENT: - case TCP_SYN_RECV: - break; - case TCP_ESTABLISHED: - rds_connect_path_complete(conn, RDS_CONN_CONNECTING); - break; - case TCP_CLOSE_WAIT: - case TCP_CLOSE: - rds_conn_drop(conn); - default: - break; + switch (sk->sk_state) { + /* ignore connecting sockets as they make progress */ + case TCP_SYN_SENT: + case TCP_SYN_RECV: + break; + case TCP_ESTABLISHED: + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); + break; + case TCP_CLOSE_WAIT: + case TCP_CLOSE: + rds_conn_drop(conn); + default: + break; } out: read_unlock_bh(&sk->sk_callback_lock); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 686b1d0..245542c 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -138,7 +138,7 @@ int rds_tcp_accept_one(struct socket *sock) rds_tcp_reset_callbacks(new_sock, conn); conn->c_outgoing = 0; /* rds_connect_path_complete() marks RDS_CONN_UP */ - rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING); + rds_connect_path_complete(conn, RDS_CONN_RESETTING); } } else { rds_tcp_set_callbacks(new_sock, conn); diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index c3196f9..6e6a711 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -171,7 +171,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, while (left) { if (!tinc) { tinc = kmem_cache_alloc(rds_tcp_incoming_slab, - arg->gfp); + arg->gfp); if (!tinc) { desc->error = -ENOMEM; goto out; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 22d0f20..618be69 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -66,19 +66,19 @@ void rds_tcp_xmit_complete(struct rds_connection *conn) static int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len) { struct kvec vec = { - .iov_base = data, - .iov_len = len, + .iov_base = data, + .iov_len = len, + }; + struct msghdr msg = { + .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL, }; - struct msghdr msg = { - .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL, - }; return kernel_sendmsg(sock, &msg, &vec, 1, vec.iov_len); } /* the core send_sem serializes this with other xmit and shutdown */ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, - unsigned int hdr_off, unsigned int sg, unsigned int off) + unsigned int hdr_off, unsigned int sg, unsigned int off) { struct rds_tcp_connection *tc = conn->c_transport_data; int done = 0; @@ -196,7 +196,7 @@ void rds_tcp_write_space(struct sock *sk) tc->t_last_seen_una = rds_tcp_snd_una(tc); rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked); - if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) + if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) queue_delayed_work(rds_wq, &conn->c_send_w, 0); out: diff --git a/net/rds/transport.c b/net/rds/transport.c index f3afd1d..2ffd3e30 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c @@ -140,8 +140,7 @@ unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, rds_info_iter_unmap(iter); down_read(&rds_trans_sem); - for (i = 0; i < RDS_TRANS_COUNT; i++) - { + for (i = 0; i < RDS_TRANS_COUNT; i++) { trans = transports[i]; if (!trans || !trans->stats_info_copy) continue; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 336774a..c7a0b0d 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1118,7 +1118,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) nla_nest_end(skb, nest); ret = skb->len; } else - nla_nest_cancel(skb, nest); + nlmsg_trim(skb, b); nlh->nlmsg_len = skb_tail_pointer(skb) - b; if (NETLINK_CB(cb->skb).portid && ret) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 658046d..ea4a2fe 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -106,9 +106,9 @@ int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi) } EXPORT_SYMBOL_GPL(ife_get_meta_u16); -int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval) +int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp) { - mi->metaval = kmemdup(metaval, sizeof(u32), GFP_KERNEL); + mi->metaval = kmemdup(metaval, sizeof(u32), gfp); if (!mi->metaval) return -ENOMEM; @@ -116,9 +116,9 @@ int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval) } EXPORT_SYMBOL_GPL(ife_alloc_meta_u32); -int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval) +int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp) { - mi->metaval = kmemdup(metaval, sizeof(u16), GFP_KERNEL); + mi->metaval = kmemdup(metaval, sizeof(u16), gfp); if (!mi->metaval) return -ENOMEM; @@ -240,10 +240,10 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len) } /* called when adding new meta information - * under ife->tcf_lock + * under ife->tcf_lock for existing action */ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, - void *val, int len) + void *val, int len, bool exists) { struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; @@ -251,11 +251,13 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, if (!ops) { ret = -ENOENT; #ifdef CONFIG_MODULES - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); rtnl_unlock(); request_module("ifemeta%u", metaid); rtnl_lock(); - spin_lock_bh(&ife->tcf_lock); + if (exists) + spin_lock_bh(&ife->tcf_lock); ops = find_ife_oplist(metaid); #endif } @@ -272,10 +274,10 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, } /* called when adding new meta information - * under ife->tcf_lock + * under ife->tcf_lock for existing action */ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len) + int len, bool atomic) { struct tcf_meta_info *mi = NULL; struct tcf_meta_ops *ops = find_ife_oplist(metaid); @@ -284,7 +286,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, if (!ops) return -ENOENT; - mi = kzalloc(sizeof(*mi), GFP_KERNEL); + mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL); if (!mi) { /*put back what find_ife_oplist took */ module_put(ops->owner); @@ -294,7 +296,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, mi->metaid = metaid; mi->ops = ops; if (len > 0) { - ret = ops->alloc(mi, metaval); + ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL); if (ret != 0) { kfree(mi); module_put(ops->owner); @@ -313,11 +315,13 @@ static int use_all_metadata(struct tcf_ife_info *ife) int rc = 0; int installed = 0; + read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0); + rc = add_metainfo(ife, o->metaid, NULL, 0, true); if (rc == 0) installed += 1; } + read_unlock(&ife_mod_lock); if (installed) return 0; @@ -385,8 +389,9 @@ static void tcf_ife_cleanup(struct tc_action *a, int bind) spin_unlock_bh(&ife->tcf_lock); } -/* under ife->tcf_lock */ -static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb) +/* under ife->tcf_lock for existing action */ +static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, + bool exists) { int len = 0; int rc = 0; @@ -398,11 +403,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb) val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(ife, i, val, len); + rc = load_metaops_and_vet(ife, i, val, len, exists); if (rc != 0) return rc; - rc = add_metainfo(ife, i, val, len); + rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; } @@ -474,7 +479,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, saddr = nla_data(tb[TCA_IFE_SMAC]); } - spin_lock_bh(&ife->tcf_lock); + if (exists) + spin_lock_bh(&ife->tcf_lock); ife->tcf_action = parm->action; if (parm->flags & IFE_ENCODE) { @@ -504,11 +510,12 @@ metadata_parse_err: if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return err; } - err = populate_metalist(ife, tb2); + err = populate_metalist(ife, tb2, exists); if (err) goto metadata_parse_err; @@ -523,12 +530,14 @@ metadata_parse_err: if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return err; } } - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); if (ret == ACT_P_CREATED) tcf_hash_insert(tn, a); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 9f002ad..d4bd19e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -121,10 +121,13 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); - if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) + if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) { + if (exists) + tcf_hash_release(a, bind); return -EINVAL; + } - if (!tcf_hash_check(tn, index, a, bind)) { + if (!exists) { ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind, false); if (ret) diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 2177eac..2e4bd2c 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -37,14 +37,18 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) { + unsigned int prev_backlog; + if (likely(skb_queue_len(&sch->q) < sch->limit)) return qdisc_enqueue_tail(skb, sch); + prev_backlog = sch->qstats.backlog; /* queue full, remove one skb to fulfill the limit */ __qdisc_queue_drop_head(sch, &sch->q); qdisc_qstats_drop(sch); qdisc_enqueue_tail(skb, sch); + qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog); return NET_XMIT_CN; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d4b4218..62f9d81 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1007,7 +1007,9 @@ static void htb_work_func(struct work_struct *work) struct htb_sched *q = container_of(work, struct htb_sched, work); struct Qdisc *sch = q->watchdog.qdisc; + rcu_read_lock(); __netif_schedule(qdisc_root(sch)); + rcu_read_unlock(); } static int htb_init(struct Qdisc *sch, struct nlattr *opt) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 205bed0..178f163 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -650,14 +650,14 @@ deliver: #endif if (q->qdisc) { + unsigned int pkt_len = qdisc_pkt_len(skb); int err = qdisc_enqueue(skb, q->qdisc); - if (unlikely(err != NET_XMIT_SUCCESS)) { - if (net_xmit_drop_count(err)) { - qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, - qdisc_pkt_len(skb)); - } + if (err != NET_XMIT_SUCCESS && + net_xmit_drop_count(err)) { + qdisc_qstats_drop(sch); + qdisc_tree_reduce_backlog(sch, 1, + pkt_len); } goto tfifo_dequeue; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 4b0a821..a356450 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -172,8 +172,9 @@ prio_destroy(struct Qdisc *sch) static int prio_tune(struct Qdisc *sch, struct nlattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); + struct Qdisc *queues[TCQ_PRIO_BANDS]; + int oldbands = q->bands, i; struct tc_prio_qopt *qopt; - int i; if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; @@ -187,62 +188,42 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; } + /* Before commit, make sure we can allocate all new qdiscs */ + for (i = oldbands; i < qopt->bands; i++) { + queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, i + 1)); + if (!queues[i]) { + while (i > oldbands) + qdisc_destroy(queues[--i]); + return -ENOMEM; + } + } + sch_tree_lock(sch); q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); - for (i = q->bands; i < TCQ_PRIO_BANDS; i++) { + for (i = q->bands; i < oldbands; i++) { struct Qdisc *child = q->queues[i]; - q->queues[i] = &noop_qdisc; - if (child != &noop_qdisc) { - qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); - qdisc_destroy(child); - } - } - sch_tree_unlock(sch); - for (i = 0; i < q->bands; i++) { - if (q->queues[i] == &noop_qdisc) { - struct Qdisc *child, *old; - - child = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, - TC_H_MAKE(sch->handle, i + 1)); - if (child) { - sch_tree_lock(sch); - old = q->queues[i]; - q->queues[i] = child; - - if (old != &noop_qdisc) { - qdisc_tree_reduce_backlog(old, - old->q.qlen, - old->qstats.backlog); - qdisc_destroy(old); - } - sch_tree_unlock(sch); - } - } + qdisc_tree_reduce_backlog(child, child->q.qlen, + child->qstats.backlog); + qdisc_destroy(child); } + + for (i = oldbands; i < q->bands; i++) + q->queues[i] = queues[i]; + + sch_tree_unlock(sch); return 0; } static int prio_init(struct Qdisc *sch, struct nlattr *opt) { - struct prio_sched_data *q = qdisc_priv(sch); - int i; - - for (i = 0; i < TCQ_PRIO_BANDS; i++) - q->queues[i] = &noop_qdisc; - - if (opt == NULL) { + if (!opt) return -EINVAL; - } else { - int err; - if ((err = prio_tune(sch, opt)) != 0) - return err; - } - return 0; + return prio_tune(sch, opt); } static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 1ce724b..f69edcf 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -3,12 +3,6 @@ #include <linux/sock_diag.h> #include <net/sctp/sctp.h> -extern void inet_diag_msg_common_fill(struct inet_diag_msg *r, - struct sock *sk); -extern int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_msg *r, int ext, - struct user_namespace *user_ns); - static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *info); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6f11c62..bf8f05c 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -405,7 +405,7 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, return 0; /* Send RESET message even if bearer is detached from device */ - tipc_ptr = rtnl_dereference(dev->tipc_ptr); + tipc_ptr = rcu_dereference_rtnl(dev->tipc_ptr); if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb)))) goto drop; diff --git a/net/tipc/link.c b/net/tipc/link.c index 7059c94..67b6ab9 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -704,7 +704,8 @@ static void link_profile_stats(struct tipc_link *l) */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { - int mtyp, rc = 0; + int mtyp = 0; + int rc = 0; bool state = false; bool probe = false; bool setup = false; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 8740930..17201aa 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -41,6 +41,8 @@ #include "name_table.h" #define MAX_FORWARD_SIZE 1024 +#define BUF_HEADROOM (LL_MAX_HEADER + 48) +#define BUF_TAILROOM 16 static unsigned int align(unsigned int i) { @@ -505,6 +507,10 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) msg_set_hdr_sz(hdr, BASIC_H_SIZE); } + if (skb_cloned(_skb) && + pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_KERNEL)) + goto exit; + /* Now reverse the concerned fields */ msg_set_errcode(hdr, err); msg_set_origport(hdr, msg_destport(&ohdr)); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 024da8a..7cf52fb 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -94,17 +94,6 @@ struct plist; #define TIPC_MEDIA_INFO_OFFSET 5 -/** - * TIPC message buffer code - * - * TIPC message buffer headroom reserves space for the worst-case - * link-level device header (in case the message is sent off-node). - * - * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields - * are word aligned for quicker access - */ -#define BUF_HEADROOM (LL_MAX_HEADER + 48) - struct tipc_skb_cb { void *handle; struct sk_buff *tail; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 88bfcd7..c49b8df 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -796,9 +796,11 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, * @tsk: receiving socket * @skb: pointer to message buffer. */ -static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb) +static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { struct sock *sk = &tsk->sk; + u32 onode = tsk_own_node(tsk); struct tipc_msg *hdr = buf_msg(skb); int mtyp = msg_type(hdr); bool conn_cong; @@ -811,7 +813,8 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb) if (mtyp == CONN_PROBE) { msg_set_type(hdr, CONN_PROBE_REPLY); - tipc_sk_respond(sk, skb, TIPC_OK); + if (tipc_msg_reverse(onode, &skb, TIPC_OK)) + __skb_queue_tail(xmitq, skb); return; } else if (mtyp == CONN_ACK) { conn_cong = tsk_conn_cong(tsk); @@ -1686,7 +1689,8 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) * * Returns true if message was added to socket receive queue, otherwise false */ -static bool filter_rcv(struct sock *sk, struct sk_buff *skb) +static bool filter_rcv(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); @@ -1696,7 +1700,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb) int usr = msg_user(hdr); if (unlikely(msg_user(hdr) == CONN_MANAGER)) { - tipc_sk_proto_rcv(tsk, skb); + tipc_sk_proto_rcv(tsk, skb, xmitq); return false; } @@ -1739,7 +1743,8 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb) return true; reject: - tipc_sk_respond(sk, skb, err); + if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err)) + __skb_queue_tail(xmitq, skb); return false; } @@ -1755,9 +1760,24 @@ reject: static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) { unsigned int truesize = skb->truesize; + struct sk_buff_head xmitq; + u32 dnode, selector; - if (likely(filter_rcv(sk, skb))) + __skb_queue_head_init(&xmitq); + + if (likely(filter_rcv(sk, skb, &xmitq))) { atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt); + return 0; + } + + if (skb_queue_empty(&xmitq)) + return 0; + + /* Send response/rejected message */ + skb = __skb_dequeue(&xmitq); + dnode = msg_destnode(buf_msg(skb)); + selector = msg_origport(buf_msg(skb)); + tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); return 0; } @@ -1771,12 +1791,13 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) * Caller must hold socket lock */ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, - u32 dport) + u32 dport, struct sk_buff_head *xmitq) { + unsigned long time_limit = jiffies + 2; + struct sk_buff *skb; unsigned int lim; atomic_t *dcnt; - struct sk_buff *skb; - unsigned long time_limit = jiffies + 2; + u32 onode; while (skb_queue_len(inputq)) { if (unlikely(time_after_eq(jiffies, time_limit))) @@ -1788,7 +1809,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, /* Add message directly to receive queue if possible */ if (!sock_owned_by_user(sk)) { - filter_rcv(sk, skb); + filter_rcv(sk, skb, xmitq); continue; } @@ -1801,7 +1822,9 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, continue; /* Overload => reject message back to sender */ - tipc_sk_respond(sk, skb, TIPC_ERR_OVERLOAD); + onode = tipc_own_addr(sock_net(sk)); + if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) + __skb_queue_tail(xmitq, skb); break; } } @@ -1814,12 +1837,14 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, */ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) { + struct sk_buff_head xmitq; u32 dnode, dport = 0; int err; struct tipc_sock *tsk; struct sock *sk; struct sk_buff *skb; + __skb_queue_head_init(&xmitq); while (skb_queue_len(inputq)) { dport = tipc_skb_peek_port(inputq, dport); tsk = tipc_sk_lookup(net, dport); @@ -1827,9 +1852,14 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) if (likely(tsk)) { sk = &tsk->sk; if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { - tipc_sk_enqueue(inputq, sk, dport); + tipc_sk_enqueue(inputq, sk, dport, &xmitq); spin_unlock_bh(&sk->sk_lock.slock); } + /* Send pending response/rejected messages, if any */ + while ((skb = __skb_dequeue(&xmitq))) { + dnode = msg_destnode(buf_msg(skb)); + tipc_node_xmit_skb(net, skb, dnode, dport); + } sock_put(sk); continue; } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index b5f1221..b96ac91 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -61,6 +61,14 @@ * function will also cleanup rejected sockets, those that reach the connected * state but leave it before they have been accepted. * + * - Lock ordering for pending or accept queue sockets is: + * + * lock_sock(listener); + * lock_sock_nested(pending, SINGLE_DEPTH_NESTING); + * + * Using explicit nested locking keeps lockdep happy since normally only one + * lock of a given class may be taken at a time. + * * - Sockets created by user action will be cleaned up when the user process * calls close(2), causing our release implementation to be called. Our release * implementation will perform some cleanup then drop the last reference so our @@ -443,7 +451,7 @@ void vsock_pending_work(struct work_struct *work) cleanup = true; lock_sock(listener); - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (vsock_is_pending(sk)) { vsock_remove_pending(listener, sk); @@ -1292,7 +1300,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) if (connected) { listener->sk_ack_backlog--; - lock_sock(connected); + lock_sock_nested(connected, SINGLE_DEPTH_NESTING); vconnected = vsock_sk(connected); /* If the listener socket has received an error, then we should diff --git a/net/wireless/util.c b/net/wireless/util.c index 4e809e9..2443ee3 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -509,7 +509,7 @@ static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr, * replace EtherType */ hdrlen += ETH_ALEN + 2; else - tmp.h_proto = htons(skb->len); + tmp.h_proto = htons(skb->len - hdrlen); pskb_pull(skb, hdrlen); |