diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/datagram.c | 6 | ||||
-rw-r--r-- | net/core/dev.c | 81 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 12 | ||||
-rw-r--r-- | net/core/gen_estimator.c | 15 | ||||
-rw-r--r-- | net/core/neighbour.c | 1 | ||||
-rw-r--r-- | net/core/pktgen.c | 2 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 30 | ||||
-rw-r--r-- | net/core/skbuff.c | 81 | ||||
-rw-r--r-- | net/core/sock.c | 52 |
9 files changed, 200 insertions, 80 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index e009753..f5b6f43 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -229,15 +229,17 @@ EXPORT_SYMBOL(skb_free_datagram); void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) { + bool slow; + if (likely(atomic_read(&skb->users) == 1)) smp_rmb(); else if (likely(!atomic_dec_and_test(&skb->users))) return; - lock_sock_bh(sk); + slow = lock_sock_fast(sk); skb_orphan(skb); sk_mem_reclaim_partial(sk); - unlock_sock_bh(sk); + unlock_sock_fast(sk, slow); /* skb is now orphaned, can be freed outside of locked section */ __kfree_skb(skb); diff --git a/net/core/dev.c b/net/core/dev.c index d273e4e..2b3bf53 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -954,18 +954,22 @@ int dev_alloc_name(struct net_device *dev, const char *name) } EXPORT_SYMBOL(dev_alloc_name); -static int dev_get_valid_name(struct net *net, const char *name, char *buf, - bool fmt) +static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt) { + struct net *net; + + BUG_ON(!dev_net(dev)); + net = dev_net(dev); + if (!dev_valid_name(name)) return -EINVAL; if (fmt && strchr(name, '%')) - return __dev_alloc_name(net, name, buf); + return dev_alloc_name(dev, name); else if (__dev_get_by_name(net, name)) return -EEXIST; - else if (buf != name) - strlcpy(buf, name, IFNAMSIZ); + else if (dev->name != name) + strlcpy(dev->name, name, IFNAMSIZ); return 0; } @@ -997,7 +1001,7 @@ int dev_change_name(struct net_device *dev, const char *newname) memcpy(oldname, dev->name, IFNAMSIZ); - err = dev_get_valid_name(net, newname, dev->name, 1); + err = dev_get_valid_name(dev, newname, 1); if (err < 0) return err; @@ -2249,11 +2253,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (skb_rx_queue_recorded(skb)) { u16 index = skb_get_rx_queue(skb); if (unlikely(index >= dev->num_rx_queues)) { - if (net_ratelimit()) { - pr_warning("%s received packet on queue " - "%u, but number of RX queues is %u\n", - dev->name, index, dev->num_rx_queues); - } + WARN_ONCE(dev->num_rx_queues > 1, "%s received packet " + "on queue %u, but number of RX queues is %u\n", + dev->name, index, dev->num_rx_queues); goto done; } rxqueue = dev->_rx + index; @@ -2421,10 +2423,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, if (skb_queue_len(&sd->input_pkt_queue)) { enqueue: __skb_queue_tail(&sd->input_pkt_queue, skb); -#ifdef CONFIG_RPS - *qtail = sd->input_queue_head + - skb_queue_len(&sd->input_pkt_queue); -#endif + input_queue_tail_incr_save(sd, qtail); rps_unlock(sd); local_irq_restore(flags); return NET_RX_SUCCESS; @@ -2794,7 +2793,7 @@ static int __netif_receive_skb(struct sk_buff *skb) struct net_device *orig_dev; struct net_device *master; struct net_device *null_or_orig; - struct net_device *null_or_bond; + struct net_device *orig_or_bond; int ret = NET_RX_DROP; __be16 type; @@ -2811,13 +2810,24 @@ static int __netif_receive_skb(struct sk_buff *skb) if (!skb->skb_iif) skb->skb_iif = skb->dev->ifindex; + /* + * bonding note: skbs received on inactive slaves should only + * be delivered to pkt handlers that are exact matches. Also + * the deliver_no_wcard flag will be set. If packet handlers + * are sensitive to duplicate packets these skbs will need to + * be dropped at the handler. The vlan accel path may have + * already set the deliver_no_wcard flag. + */ null_or_orig = NULL; orig_dev = skb->dev; master = ACCESS_ONCE(orig_dev->master); - if (master) { - if (skb_bond_should_drop(skb, master)) + if (skb->deliver_no_wcard) + null_or_orig = orig_dev; + else if (master) { + if (skb_bond_should_drop(skb, master)) { + skb->deliver_no_wcard = 1; null_or_orig = orig_dev; /* deliver only exact match */ - else + } else skb->dev = master; } @@ -2867,10 +2877,10 @@ ncls: * device that may have registered for a specific ptype. The * handler may have to adjust skb->dev and orig_dev. */ - null_or_bond = NULL; + orig_or_bond = orig_dev; if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { - null_or_bond = vlan_dev_real_dev(skb->dev); + orig_or_bond = vlan_dev_real_dev(skb->dev); } type = skb->protocol; @@ -2878,7 +2888,7 @@ ncls: &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { if (ptype->type == type && (ptype->dev == null_or_orig || ptype->dev == skb->dev || ptype->dev == orig_dev || - ptype->dev == null_or_bond)) { + ptype->dev == orig_or_bond)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; @@ -2959,7 +2969,7 @@ static void flush_backlog(void *arg) if (skb->dev == dev) { __skb_unlink(skb, &sd->input_pkt_queue); kfree_skb(skb); - input_queue_head_add(sd, 1); + input_queue_head_incr(sd); } } rps_unlock(sd); @@ -2968,6 +2978,7 @@ static void flush_backlog(void *arg) if (skb->dev == dev) { __skb_unlink(skb, &sd->process_queue); kfree_skb(skb); + input_queue_head_incr(sd); } } } @@ -3323,18 +3334,20 @@ static int process_backlog(struct napi_struct *napi, int quota) while ((skb = __skb_dequeue(&sd->process_queue))) { local_irq_enable(); __netif_receive_skb(skb); - if (++work >= quota) - return work; local_irq_disable(); + input_queue_head_incr(sd); + if (++work >= quota) { + local_irq_enable(); + return work; + } } rps_lock(sd); qlen = skb_queue_len(&sd->input_pkt_queue); - if (qlen) { - input_queue_head_add(sd, qlen); + if (qlen) skb_queue_splice_tail_init(&sd->input_pkt_queue, &sd->process_queue); - } + if (qlen < quota - work) { /* * Inline a custom version of __napi_complete(). @@ -4960,7 +4973,7 @@ int register_netdevice(struct net_device *dev) } } - ret = dev_get_valid_name(net, dev->name, dev->name, 0); + ret = dev_get_valid_name(dev, dev->name, 0); if (ret) goto err_uninit; @@ -5558,7 +5571,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* We get here if we can't use the current device name */ if (!pat) goto out; - if (dev_get_valid_name(net, pat, dev->name, 1)) + if (dev_get_valid_name(dev, pat, 1)) goto out; } @@ -5661,12 +5674,14 @@ static int dev_cpu_callback(struct notifier_block *nfb, local_irq_enable(); /* Process offline CPU's input_pkt_queue */ - while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { + while ((skb = __skb_dequeue(&oldsd->process_queue))) { netif_rx(skb); - input_queue_head_add(oldsd, 1); + input_queue_head_incr(oldsd); } - while ((skb = __skb_dequeue(&oldsd->process_queue))) + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { netif_rx(skb); + input_queue_head_incr(oldsd); + } return NOTIFY_OK; } diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index cf208d8..ad41529 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -172,12 +172,12 @@ out: return; } -static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) +static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) { trace_drop_common(skb, location); } -static void trace_napi_poll_hit(struct napi_struct *napi) +static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi) { struct dm_hw_stat_delta *new_stat; @@ -225,12 +225,12 @@ static int set_all_monitor_traces(int state) switch (state) { case TRACE_ON: - rc |= register_trace_kfree_skb(trace_kfree_skb_hit); - rc |= register_trace_napi_poll(trace_napi_poll_hit); + rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL); + rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL); break; case TRACE_OFF: - rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); - rc |= unregister_trace_napi_poll(trace_napi_poll_hit); + rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL); + rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL); tracepoint_synchronize_unregister(); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index cf8e703..785e527 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -107,6 +107,7 @@ static DEFINE_RWLOCK(est_lock); /* Protects against soft lockup during large deletion */ static struct rb_root est_root = RB_ROOT; +static DEFINE_SPINLOCK(est_tree_lock); static void est_timer(unsigned long arg) { @@ -201,7 +202,6 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats * * Returns 0 on success or a negative error code. * - * NOTE: Called under rtnl_mutex */ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est, @@ -232,6 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, est->last_packets = bstats->packets; est->avpps = rate_est->pps<<10; + spin_lock(&est_tree_lock); if (!elist[idx].timer.function) { INIT_LIST_HEAD(&elist[idx].list); setup_timer(&elist[idx].timer, est_timer, idx); @@ -242,6 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, list_add_rcu(&est->list, &elist[idx].list); gen_add_node(est); + spin_unlock(&est_tree_lock); return 0; } @@ -261,13 +263,13 @@ static void __gen_kill_estimator(struct rcu_head *head) * * Removes the rate estimator specified by &bstats and &rate_est. * - * NOTE: Called under rtnl_mutex */ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est) { struct gen_estimator *e; + spin_lock(&est_tree_lock); while ((e = gen_find_node(bstats, rate_est))) { rb_erase(&e->node, &est_root); @@ -278,6 +280,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, list_del_rcu(&e->list); call_rcu(&e->e_rcu, __gen_kill_estimator); } + spin_unlock(&est_tree_lock); } EXPORT_SYMBOL(gen_kill_estimator); @@ -312,8 +315,14 @@ EXPORT_SYMBOL(gen_replace_estimator); bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est *rate_est) { + bool res; + ASSERT_RTNL(); - return gen_find_node(bstats, rate_est) != NULL; + spin_lock(&est_tree_lock); + res = gen_find_node(bstats, rate_est) != NULL; + spin_unlock(&est_tree_lock); + + return res; } EXPORT_SYMBOL(gen_estimator_active); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index bff3790..6ba1c0e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -934,6 +934,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) kfree_skb(buff); NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } + skb_dst_force(skb); __skb_queue_tail(&neigh->arp_queue, skb); } rc = 1; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2ad68da..1dacd7b 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2170,7 +2170,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) end_time = ktime_now(); pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); - pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay); + pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); } static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e4b9870..1a2af24 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -650,11 +650,12 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev) if (dev->dev.parent && dev_is_pci(dev->dev.parent)) { int num_vfs = dev_num_vf(dev->dev.parent); - size_t size = nlmsg_total_size(sizeof(struct nlattr)); - size += nlmsg_total_size(num_vfs * sizeof(struct nlattr)); - size += num_vfs * (sizeof(struct ifla_vf_mac) + - sizeof(struct ifla_vf_vlan) + - sizeof(struct ifla_vf_tx_rate)); + size_t size = nla_total_size(sizeof(struct nlattr)); + size += nla_total_size(num_vfs * sizeof(struct nlattr)); + size += num_vfs * + (nla_total_size(sizeof(struct ifla_vf_mac)) + + nla_total_size(sizeof(struct ifla_vf_vlan)) + + nla_total_size(sizeof(struct ifla_vf_tx_rate))); return size; } else return 0; @@ -722,14 +723,13 @@ static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) { vf_port = nla_nest_start(skb, IFLA_VF_PORT); - if (!vf_port) { - nla_nest_cancel(skb, vf_ports); - return -EMSGSIZE; - } + if (!vf_port) + goto nla_put_failure; NLA_PUT_U32(skb, IFLA_PORT_VF, vf); err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb); + if (err == -EMSGSIZE) + goto nla_put_failure; if (err) { -nla_put_failure: nla_nest_cancel(skb, vf_port); continue; } @@ -739,6 +739,10 @@ nla_put_failure: nla_nest_end(skb, vf_ports); return 0; + +nla_put_failure: + nla_nest_cancel(skb, vf_ports); + return -EMSGSIZE; } static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) @@ -753,7 +757,7 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb); if (err) { nla_nest_cancel(skb, port_self); - return err; + return (err == -EMSGSIZE) ? err : 0; } nla_nest_end(skb, port_self); @@ -1199,8 +1203,10 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr *attr; int rem; nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { - if (nla_type(attr) != IFLA_VF_INFO) + if (nla_type(attr) != IFLA_VF_INFO) { + err = -EINVAL; goto errout; + } err = do_setvfinfo(dev, attr); if (err < 0) goto errout; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c543dd2..9f07e74 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -482,22 +482,22 @@ EXPORT_SYMBOL(consume_skb); * reference count dropping and cleans up the skbuff as if it * just came from __alloc_skb(). */ -int skb_recycle_check(struct sk_buff *skb, int skb_size) +bool skb_recycle_check(struct sk_buff *skb, int skb_size) { struct skb_shared_info *shinfo; if (irqs_disabled()) - return 0; + return false; if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) - return 0; + return false; skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); if (skb_end_pointer(skb) - skb->head < skb_size) - return 0; + return false; if (skb_shared(skb) || skb_cloned(skb)) - return 0; + return false; skb_release_head_state(skb); @@ -509,7 +509,7 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) skb->data = skb->head + NET_SKB_PAD; skb_reset_tail_pointer(skb); - return 1; + return true; } EXPORT_SYMBOL(skb_recycle_check); @@ -1406,12 +1406,13 @@ new_page: /* * Fill page/offset/length into spd, if it can hold more pages. */ -static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, +static inline int spd_fill_page(struct splice_pipe_desc *spd, + struct pipe_inode_info *pipe, struct page *page, unsigned int *len, unsigned int offset, struct sk_buff *skb, int linear, struct sock *sk) { - if (unlikely(spd->nr_pages == PIPE_BUFFERS)) + if (unlikely(spd->nr_pages == pipe->buffers)) return 1; if (linear) { @@ -1447,7 +1448,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct sk_buff *skb, struct splice_pipe_desc *spd, int linear, - struct sock *sk) + struct sock *sk, + struct pipe_inode_info *pipe) { if (!*len) return 1; @@ -1470,7 +1472,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff, /* the linear region may spread across several pages */ flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk)) + if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) return 1; __segment_seek(&page, &poff, &plen, flen); @@ -1485,9 +1487,9 @@ static inline int __splice_segment(struct page *page, unsigned int poff, * Map linear and fragment data from the skb to spd. It reports failure if the * pipe is full or if we already spliced the requested length. */ -static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, - unsigned int *len, struct splice_pipe_desc *spd, - struct sock *sk) +static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, + unsigned int *offset, unsigned int *len, + struct splice_pipe_desc *spd, struct sock *sk) { int seg; @@ -1497,7 +1499,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), - offset, len, skb, spd, 1, sk)) + offset, len, skb, spd, 1, sk, pipe)) return 1; /* @@ -1507,7 +1509,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(f->page, f->page_offset, f->size, - offset, len, skb, spd, 0, sk)) + offset, len, skb, spd, 0, sk, pipe)) return 1; } @@ -1524,8 +1526,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct pipe_inode_info *pipe, unsigned int tlen, unsigned int flags) { - struct partial_page partial[PIPE_BUFFERS]; - struct page *pages[PIPE_BUFFERS]; + struct partial_page partial[PIPE_DEF_BUFFERS]; + struct page *pages[PIPE_DEF_BUFFERS]; struct splice_pipe_desc spd = { .pages = pages, .partial = partial, @@ -1535,12 +1537,16 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, }; struct sk_buff *frag_iter; struct sock *sk = skb->sk; + int ret = 0; + + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; /* * __skb_splice_bits() only fails if the output has no room left, * so no point in going over the frag_list for the error case. */ - if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk)) + if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk)) goto done; else if (!tlen) goto done; @@ -1551,14 +1557,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, skb_walk_frags(skb, frag_iter) { if (!tlen) break; - if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk)) + if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk)) break; } done: if (spd.nr_pages) { - int ret; - /* * Drop the socket lock, otherwise we have reverse * locking dependencies between sk_lock and i_mutex @@ -1571,10 +1575,10 @@ done: release_sock(sk); ret = splice_to_pipe(pipe, &spd); lock_sock(sk); - return ret; } - return 0; + splice_shrink_spd(pipe, &spd); + return ret; } /** @@ -2718,6 +2722,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); skb_shinfo(nskb)->frag_list = p; skb_shinfo(nskb)->gso_size = pinfo->gso_size; + pinfo->gso_size = 0; skb_header_release(p); nskb->prev = p; @@ -2960,6 +2965,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) } EXPORT_SYMBOL_GPL(skb_cow_data); +static void sock_rmem_free(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); +} + +/* + * Note: We dont mem charge error packets (no sk_forward_alloc changes) + */ +int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) +{ + if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= + (unsigned)sk->sk_rcvbuf) + return -ENOMEM; + + skb_orphan(skb); + skb->sk = sk; + skb->destructor = sock_rmem_free; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); + + skb_queue_tail(&sk->sk_error_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, skb->len); + return 0; +} +EXPORT_SYMBOL(sock_queue_err_skb); + void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps) { @@ -2991,7 +3024,9 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + err = sock_queue_err_skb(sk, skb); + if (err) kfree_skb(skb); } diff --git a/net/core/sock.c b/net/core/sock.c index bf88a16..2cf7f9f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -123,6 +123,7 @@ #include <linux/net_tstamp.h> #include <net/xfrm.h> #include <linux/ipsec.h> +#include <net/cls_cgroup.h> #include <linux/filter.h> @@ -217,6 +218,11 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); EXPORT_SYMBOL(sysctl_optmem_max); +#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP) +int net_cls_subsys_id = -1; +EXPORT_SYMBOL_GPL(net_cls_subsys_id); +#endif + static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) { struct timeval tv; @@ -1050,6 +1056,17 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) module_put(owner); } +#ifdef CONFIG_CGROUPS +void sock_update_classid(struct sock *sk) +{ + u32 classid = task_cls_classid(current); + + if (classid && classid != sk->sk_classid) + sk->sk_classid = classid; +} +EXPORT_SYMBOL(sock_update_classid); +#endif + /** * sk_alloc - All socket objects are allocated here * @net: the applicable net namespace @@ -1073,6 +1090,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_lock_init(sk); sock_net_set(sk, get_net(net)); atomic_set(&sk->sk_wmem_alloc, 1); + + sock_update_classid(sk); } return sk; @@ -1988,6 +2007,39 @@ void release_sock(struct sock *sk) } EXPORT_SYMBOL(release_sock); +/** + * lock_sock_fast - fast version of lock_sock + * @sk: socket + * + * This version should be used for very small section, where process wont block + * return false if fast path is taken + * sk_lock.slock locked, owned = 0, BH disabled + * return true if slow path is taken + * sk_lock.slock unlocked, owned = 1, BH enabled + */ +bool lock_sock_fast(struct sock *sk) +{ + might_sleep(); + spin_lock_bh(&sk->sk_lock.slock); + + if (!sk->sk_lock.owned) + /* + * Note : We must disable BH + */ + return false; + + __lock_sock(sk); + sk->sk_lock.owned = 1; + spin_unlock(&sk->sk_lock.slock); + /* + * The sk_lock has mutex_lock() semantics here: + */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + local_bh_enable(); + return true; +} +EXPORT_SYMBOL(lock_sock_fast); + int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) { struct timeval tv; |