summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c57
-rw-r--r--net/core/dev.c45
-rw-r--r--net/core/dst.c4
-rw-r--r--net/core/gen_estimator.c13
-rw-r--r--net/core/netclassid_cgroup.c3
-rw-r--r--net/core/pktgen.c12
-rw-r--r--net/core/request_sock.c8
-rw-r--r--net/core/rtnetlink.c198
-rw-r--r--net/core/skbuff.c39
-rw-r--r--net/core/sock.c8
10 files changed, 218 insertions, 169 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b80fb91..617088a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -131,6 +131,35 @@ out_noerr:
goto out;
}
+static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
+{
+ struct sk_buff *nskb;
+
+ if (skb->peeked)
+ return skb;
+
+ /* We have to unshare an skb before modifying it. */
+ if (!skb_shared(skb))
+ goto done;
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return ERR_PTR(-ENOMEM);
+
+ skb->prev->next = nskb;
+ skb->next->prev = nskb;
+ nskb->prev = skb->prev;
+ nskb->next = skb->next;
+
+ consume_skb(skb);
+ skb = nskb;
+
+done:
+ skb->peeked = 1;
+
+ return skb;
+}
+
/**
* __skb_recv_datagram - Receive a datagram skbuff
* @sk: socket
@@ -165,7 +194,9 @@ out_noerr:
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
int *peeked, int *off, int *err)
{
+ struct sk_buff_head *queue = &sk->sk_receive_queue;
struct sk_buff *skb, *last;
+ unsigned long cpu_flags;
long timeo;
/*
* Caller is allowed not to check sk->sk_err before skb_recv_datagram()
@@ -184,8 +215,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
* Look at current nfs client by the way...
* However, this function was correct in any case. 8)
*/
- unsigned long cpu_flags;
- struct sk_buff_head *queue = &sk->sk_receive_queue;
int _off = *off;
last = (struct sk_buff *)queue;
@@ -199,7 +228,12 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
_off -= skb->len;
continue;
}
- skb->peeked = 1;
+
+ skb = skb_set_peeked(skb);
+ error = PTR_ERR(skb);
+ if (IS_ERR(skb))
+ goto unlock_err;
+
atomic_inc(&skb->users);
} else
__skb_unlink(skb, queue);
@@ -223,6 +257,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
return NULL;
+unlock_err:
+ spin_unlock_irqrestore(&queue->lock, cpu_flags);
no_packet:
*err = error;
return NULL;
@@ -622,7 +658,8 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
!skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev);
}
- skb->csum_valid = !sum;
+ if (!skb_shared(skb))
+ skb->csum_valid = !sum;
return sum;
}
EXPORT_SYMBOL(__skb_checksum_complete_head);
@@ -642,11 +679,13 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
netdev_rx_csum_fault(skb->dev);
}
- /* Save full packet checksum */
- skb->csum = csum;
- skb->ip_summed = CHECKSUM_COMPLETE;
- skb->csum_complete_sw = 1;
- skb->csum_valid = !sum;
+ if (!skb_shared(skb)) {
+ /* Save full packet checksum */
+ skb->csum = csum;
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum_complete_sw = 1;
+ skb->csum_valid = !sum;
+ }
return sum;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 6778a99..a8e4dd4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -677,10 +677,6 @@ int dev_get_iflink(const struct net_device *dev)
if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
return dev->netdev_ops->ndo_get_iflink(dev);
- /* If dev->rtnl_link_ops is set, it's a virtual interface. */
- if (dev->rtnl_link_ops)
- return 0;
-
return dev->ifindex;
}
EXPORT_SYMBOL(dev_get_iflink);
@@ -3452,6 +3448,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
local_irq_save(flags);
rps_lock(sd);
+ if (!netif_running(skb->dev))
+ goto drop;
qlen = skb_queue_len(&sd->input_pkt_queue);
if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
if (qlen) {
@@ -3473,6 +3471,7 @@ enqueue:
goto enqueue;
}
+drop:
sd->dropped++;
rps_unlock(sd);
@@ -3775,8 +3774,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
pt_prev = NULL;
- rcu_read_lock();
-
another_round:
skb->skb_iif = skb->dev->ifindex;
@@ -3786,7 +3783,7 @@ another_round:
skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
skb = skb_vlan_untag(skb);
if (unlikely(!skb))
- goto unlock;
+ goto out;
}
#ifdef CONFIG_NET_CLS_ACT
@@ -3816,10 +3813,10 @@ skip_taps:
if (static_key_false(&ingress_needed)) {
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
if (!skb)
- goto unlock;
+ goto out;
if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
- goto unlock;
+ goto out;
}
#endif
#ifdef CONFIG_NET_CLS_ACT
@@ -3837,7 +3834,7 @@ ncls:
if (vlan_do_receive(&skb))
goto another_round;
else if (unlikely(!skb))
- goto unlock;
+ goto out;
}
rx_handler = rcu_dereference(skb->dev->rx_handler);
@@ -3849,7 +3846,7 @@ ncls:
switch (rx_handler(&skb)) {
case RX_HANDLER_CONSUMED:
ret = NET_RX_SUCCESS;
- goto unlock;
+ goto out;
case RX_HANDLER_ANOTHER:
goto another_round;
case RX_HANDLER_EXACT:
@@ -3903,8 +3900,7 @@ drop:
ret = NET_RX_DROP;
}
-unlock:
- rcu_read_unlock();
+out:
return ret;
}
@@ -3935,29 +3931,30 @@ static int __netif_receive_skb(struct sk_buff *skb)
static int netif_receive_skb_internal(struct sk_buff *skb)
{
+ int ret;
+
net_timestamp_check(netdev_tstamp_prequeue, skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
+ rcu_read_lock();
+
#ifdef CONFIG_RPS
if (static_key_false(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
- int cpu, ret;
-
- rcu_read_lock();
-
- cpu = get_rps_cpu(skb->dev, skb, &rflow);
+ int cpu = get_rps_cpu(skb->dev, skb, &rflow);
if (cpu >= 0) {
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
return ret;
}
- rcu_read_unlock();
}
#endif
- return __netif_receive_skb(skb);
+ ret = __netif_receive_skb(skb);
+ rcu_read_unlock();
+ return ret;
}
/**
@@ -4502,8 +4499,10 @@ static int process_backlog(struct napi_struct *napi, int quota)
struct sk_buff *skb;
while ((skb = __skb_dequeue(&sd->process_queue))) {
+ rcu_read_lock();
local_irq_enable();
__netif_receive_skb(skb);
+ rcu_read_unlock();
local_irq_disable();
input_queue_head_incr(sd);
if (++work >= quota) {
@@ -6139,6 +6138,7 @@ static void rollback_registered_many(struct list_head *head)
unlist_netdevice(dev);
dev->reg_state = NETREG_UNREGISTERING;
+ on_each_cpu(flush_backlog, dev, 1);
}
synchronize_net();
@@ -6409,7 +6409,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
struct netdev_queue *tx;
size_t sz = count * sizeof(*tx);
- BUG_ON(count < 1 || count > 0xffff);
+ if (count < 1 || count > 0xffff)
+ return -EINVAL;
tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
if (!tx) {
@@ -6773,8 +6774,6 @@ void netdev_run_todo(void)
dev->reg_state = NETREG_UNREGISTERED;
- on_each_cpu(flush_backlog, dev, 1);
-
netdev_wait_allrefs(dev);
/* paranoia */
diff --git a/net/core/dst.c b/net/core/dst.c
index e956ce6..002144be 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -284,7 +284,9 @@ void dst_release(struct dst_entry *dst)
int newrefcnt;
newrefcnt = atomic_dec_return(&dst->__refcnt);
- WARN_ON(newrefcnt < 0);
+ if (unlikely(newrefcnt < 0))
+ net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
+ __func__, dst, newrefcnt);
if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 9dfb88a..92d886f 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -66,7 +66,7 @@
NOTES.
- * avbps is scaled by 2^5, avpps is scaled by 2^10.
+ * avbps and avpps are scaled by 2^5.
* both values are reported as 32 bit unsigned values. bps can
overflow for fast links : max speed being 34360Mbit/sec
* Minimal interval is HZ/4=250msec (it is the greatest common divisor
@@ -85,10 +85,10 @@ struct gen_estimator
struct gnet_stats_rate_est64 *rate_est;
spinlock_t *stats_lock;
int ewma_log;
+ u32 last_packets;
+ unsigned long avpps;
u64 last_bytes;
u64 avbps;
- u32 last_packets;
- u32 avpps;
struct rcu_head e_rcu;
struct rb_node node;
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
@@ -118,8 +118,8 @@ static void est_timer(unsigned long arg)
rcu_read_lock();
list_for_each_entry_rcu(e, &elist[idx].list, list) {
struct gnet_stats_basic_packed b = {0};
+ unsigned long rate;
u64 brate;
- u32 rate;
spin_lock(e->stats_lock);
read_lock(&est_lock);
@@ -133,10 +133,11 @@ static void est_timer(unsigned long arg)
e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
e->rate_est->bps = (e->avbps+0xF)>>5;
- rate = (b.packets - e->last_packets)<<(12 - idx);
+ rate = b.packets - e->last_packets;
+ rate <<= (7 - idx);
e->last_packets = b.packets;
e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
- e->rate_est->pps = (e->avpps+0x1FF)>>10;
+ e->rate_est->pps = (e->avpps + 0xF) >> 5;
skip:
read_unlock(&est_lock);
spin_unlock(e->stats_lock);
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 1f2a126..6441f47 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -23,7 +23,8 @@ static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state
struct cgroup_cls_state *task_cls_state(struct task_struct *p)
{
- return css_cls_state(task_css(p, net_cls_cgrp_id));
+ return css_cls_state(task_css_check(p, net_cls_cgrp_id,
+ rcu_read_lock_bh_held()));
}
EXPORT_SYMBOL_GPL(task_cls_state);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 05badbb..1cbd209 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3514,8 +3514,6 @@ static int pktgen_thread_worker(void *arg)
set_freezable();
- __set_current_state(TASK_RUNNING);
-
while (!kthread_should_stop()) {
pkt_dev = next_to_run(t);
@@ -3560,7 +3558,6 @@ static int pktgen_thread_worker(void *arg)
try_to_freeze();
}
- set_current_state(TASK_INTERRUPTIBLE);
pr_debug("%s stopping all device\n", t->tsk->comm);
pktgen_stop(t);
@@ -3571,13 +3568,6 @@ static int pktgen_thread_worker(void *arg)
pr_debug("%s removing thread\n", t->tsk->comm);
pktgen_rem_thread(t);
- /* Wait for kthread_stop */
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- }
- __set_current_state(TASK_RUNNING);
-
return 0;
}
@@ -3769,6 +3759,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
}
t->net = pn;
+ get_task_struct(p);
wake_up_process(p);
wait_for_completion(&t->start_done);
@@ -3891,6 +3882,7 @@ static void __net_exit pg_net_exit(struct net *net)
t = list_entry(q, struct pktgen_thread, th_list);
list_del(&t->th_list);
kthread_stop(t->tsk);
+ put_task_struct(t->tsk);
kfree(t);
}
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 87b22c0..b42f0e2 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -103,10 +103,16 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
spin_lock_bh(&queue->syn_wait_lock);
while ((req = lopt->syn_table[i]) != NULL) {
lopt->syn_table[i] = req->dl_next;
+ /* Because of following del_timer_sync(),
+ * we must release the spinlock here
+ * or risk a dead lock.
+ */
+ spin_unlock_bh(&queue->syn_wait_lock);
atomic_inc(&lopt->qlen_dec);
- if (del_timer(&req->rsk_timer))
+ if (del_timer_sync(&req->rsk_timer))
reqsk_put(req);
reqsk_put(req);
+ spin_lock_bh(&queue->syn_wait_lock);
}
spin_unlock_bh(&queue->syn_wait_lock);
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 01ced4a..dc004b1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1328,10 +1328,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
[IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED },
};
-static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
- [IFLA_VF_INFO] = { .type = NLA_NESTED },
-};
-
static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
[IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) },
[IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) },
@@ -1488,96 +1484,98 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
return 0;
}
-static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
+static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
{
- int rem, err = -EINVAL;
- struct nlattr *vf;
const struct net_device_ops *ops = dev->netdev_ops;
+ int err = -EINVAL;
- nla_for_each_nested(vf, attr, rem) {
- switch (nla_type(vf)) {
- case IFLA_VF_MAC: {
- struct ifla_vf_mac *ivm;
- ivm = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_mac)
- err = ops->ndo_set_vf_mac(dev, ivm->vf,
- ivm->mac);
- break;
- }
- case IFLA_VF_VLAN: {
- struct ifla_vf_vlan *ivv;
- ivv = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_vlan)
- err = ops->ndo_set_vf_vlan(dev, ivv->vf,
- ivv->vlan,
- ivv->qos);
- break;
- }
- case IFLA_VF_TX_RATE: {
- struct ifla_vf_tx_rate *ivt;
- struct ifla_vf_info ivf;
- ivt = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_get_vf_config)
- err = ops->ndo_get_vf_config(dev, ivt->vf,
- &ivf);
- if (err)
- break;
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_rate)
- err = ops->ndo_set_vf_rate(dev, ivt->vf,
- ivf.min_tx_rate,
- ivt->rate);
- break;
- }
- case IFLA_VF_RATE: {
- struct ifla_vf_rate *ivt;
- ivt = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_rate)
- err = ops->ndo_set_vf_rate(dev, ivt->vf,
- ivt->min_tx_rate,
- ivt->max_tx_rate);
- break;
- }
- case IFLA_VF_SPOOFCHK: {
- struct ifla_vf_spoofchk *ivs;
- ivs = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_spoofchk)
- err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
- ivs->setting);
- break;
- }
- case IFLA_VF_LINK_STATE: {
- struct ifla_vf_link_state *ivl;
- ivl = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_link_state)
- err = ops->ndo_set_vf_link_state(dev, ivl->vf,
- ivl->link_state);
- break;
- }
- case IFLA_VF_RSS_QUERY_EN: {
- struct ifla_vf_rss_query_en *ivrssq_en;
+ if (tb[IFLA_VF_MAC]) {
+ struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]);
- ivrssq_en = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_rss_query_en)
- err = ops->ndo_set_vf_rss_query_en(dev,
- ivrssq_en->vf,
- ivrssq_en->setting);
- break;
- }
- default:
- err = -EINVAL;
- break;
- }
- if (err)
- break;
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_mac)
+ err = ops->ndo_set_vf_mac(dev, ivm->vf,
+ ivm->mac);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_VLAN]) {
+ struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_vlan)
+ err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
+ ivv->qos);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_TX_RATE]) {
+ struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]);
+ struct ifla_vf_info ivf;
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_get_vf_config)
+ err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf);
+ if (err < 0)
+ return err;
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_rate)
+ err = ops->ndo_set_vf_rate(dev, ivt->vf,
+ ivf.min_tx_rate,
+ ivt->rate);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_RATE]) {
+ struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_rate)
+ err = ops->ndo_set_vf_rate(dev, ivt->vf,
+ ivt->min_tx_rate,
+ ivt->max_tx_rate);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_SPOOFCHK]) {
+ struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_spoofchk)
+ err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
+ ivs->setting);
+ if (err < 0)
+ return err;
}
+
+ if (tb[IFLA_VF_LINK_STATE]) {
+ struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_link_state)
+ err = ops->ndo_set_vf_link_state(dev, ivl->vf,
+ ivl->link_state);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_RSS_QUERY_EN]) {
+ struct ifla_vf_rss_query_en *ivrssq_en;
+
+ err = -EOPNOTSUPP;
+ ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]);
+ if (ops->ndo_set_vf_rss_query_en)
+ err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf,
+ ivrssq_en->setting);
+ if (err < 0)
+ return err;
+ }
+
return err;
}
@@ -1773,14 +1771,21 @@ static int do_setlink(const struct sk_buff *skb,
}
if (tb[IFLA_VFINFO_LIST]) {
+ struct nlattr *vfinfo[IFLA_VF_MAX + 1];
struct nlattr *attr;
int rem;
+
nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
- if (nla_type(attr) != IFLA_VF_INFO) {
+ if (nla_type(attr) != IFLA_VF_INFO ||
+ nla_len(attr) < NLA_HDRLEN) {
err = -EINVAL;
goto errout;
}
- err = do_setvfinfo(dev, attr);
+ err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr,
+ ifla_vf_policy);
+ if (err < 0)
+ goto errout;
+ err = do_setvfinfo(dev, vfinfo);
if (err < 0)
goto errout;
status |= DO_SETLINK_NOTIFY;
@@ -1799,10 +1804,13 @@ static int do_setlink(const struct sk_buff *skb,
goto errout;
nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) {
- if (nla_type(attr) != IFLA_VF_PORT)
- continue;
- err = nla_parse_nested(port, IFLA_PORT_MAX,
- attr, ifla_port_policy);
+ if (nla_type(attr) != IFLA_VF_PORT ||
+ nla_len(attr) < NLA_HDRLEN) {
+ err = -EINVAL;
+ goto errout;
+ }
+ err = nla_parse_nested(port, IFLA_PORT_MAX, attr,
+ ifla_port_policy);
if (err < 0)
goto errout;
if (!port[IFLA_PORT_VF]) {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b6a19ca..7b84330 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -340,7 +340,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
if (skb && frag_size) {
skb->head_frag = 1;
- if (virt_to_head_page(data)->pfmemalloc)
+ if (page_is_pfmemalloc(virt_to_head_page(data)))
skb->pfmemalloc = 1;
}
return skb;
@@ -4022,8 +4022,8 @@ EXPORT_SYMBOL(skb_checksum_setup);
* Otherwise returns the provided skb. Returns NULL in error cases
* (e.g. transport_len exceeds skb length or out-of-memory).
*
- * Caller needs to set the skb transport header and release the returned skb.
- * Provided skb is consumed.
+ * Caller needs to set the skb transport header and free any returned skb if it
+ * differs from the provided skb.
*/
static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb,
unsigned int transport_len)
@@ -4032,16 +4032,12 @@ static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb,
unsigned int len = skb_transport_offset(skb) + transport_len;
int ret;
- if (skb->len < len) {
- kfree_skb(skb);
+ if (skb->len < len)
return NULL;
- } else if (skb->len == len) {
+ else if (skb->len == len)
return skb;
- }
skb_chk = skb_clone(skb, GFP_ATOMIC);
- kfree_skb(skb);
-
if (!skb_chk)
return NULL;
@@ -4066,8 +4062,8 @@ static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb,
* If the skb has data beyond the given transport length, then a
* trimmed & cloned skb is checked and returned.
*
- * Caller needs to set the skb transport header and release the returned skb.
- * Provided skb is consumed.
+ * Caller needs to set the skb transport header and free any returned skb if it
+ * differs from the provided skb.
*/
struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
unsigned int transport_len,
@@ -4079,23 +4075,26 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
skb_chk = skb_checksum_maybe_trim(skb, transport_len);
if (!skb_chk)
- return NULL;
+ goto err;
- if (!pskb_may_pull(skb_chk, offset)) {
- kfree_skb(skb_chk);
- return NULL;
- }
+ if (!pskb_may_pull(skb_chk, offset))
+ goto err;
__skb_pull(skb_chk, offset);
ret = skb_chkf(skb_chk);
__skb_push(skb_chk, offset);
- if (ret) {
- kfree_skb(skb_chk);
- return NULL;
- }
+ if (ret)
+ goto err;
return skb_chk;
+
+err:
+ if (skb_chk && skb_chk != skb)
+ kfree_skb(skb_chk);
+
+ return NULL;
+
}
EXPORT_SYMBOL(skb_checksum_trimmed);
diff --git a/net/core/sock.c b/net/core/sock.c
index 08f16db..193901d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1497,7 +1497,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
sock_copy(newsk, sk);
/* SANITY */
- get_net(sock_net(newsk));
+ if (likely(newsk->sk_net_refcnt))
+ get_net(sock_net(newsk));
sk_node_init(&newsk->sk_node);
sock_lock_init(newsk);
bh_lock_sock(newsk);
@@ -1967,20 +1968,21 @@ static void __release_sock(struct sock *sk)
* sk_wait_data - wait for data to arrive at sk_receive_queue
* @sk: sock to wait on
* @timeo: for how long
+ * @skb: last skb seen on sk_receive_queue
*
* Now socket state including sk->sk_err is changed only under lock,
* hence we may omit checks after joining wait queue.
* We check receive queue before schedule() only as optimization;
* it is very likely that release_sock() added new data.
*/
-int sk_wait_data(struct sock *sk, long *timeo)
+int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
{
int rc;
DEFINE_WAIT(wait);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
- rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
+ rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
finish_wait(sk_sleep(sk), &wait);
return rc;