summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--net/ipv4/tcp_input.c79
1 files changed, 62 insertions, 17 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 37e1c5c..bd7aef5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4449,6 +4449,58 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
return 0;
}
+/**
+ * tcp_try_coalesce - try to merge skb to prior one
+ * @sk: socket
+ * @to: prior buffer
+ * @from: buffer to add in queue
+ *
+ * Before queueing skb @from after @to, try to merge them
+ * to reduce overall memory use and queue lengths, if cost is small.
+ * Packets in ofo or receive queues can stay a long time.
+ * Better try to coalesce them right now to avoid future collapses.
+ * Returns > 0 value if caller should free @from instead of queueing it
+ */
+static int tcp_try_coalesce(struct sock *sk,
+ struct sk_buff *to,
+ struct sk_buff *from)
+{
+ int len = from->len;
+
+ if (tcp_hdr(from)->fin)
+ return 0;
+ if (len <= skb_tailroom(to)) {
+ BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
+merge:
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
+ TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
+ TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
+ return 1;
+ }
+ if (skb_headlen(from) == 0 &&
+ !skb_has_frag_list(to) &&
+ !skb_has_frag_list(from) &&
+ (skb_shinfo(to)->nr_frags +
+ skb_shinfo(from)->nr_frags <= MAX_SKB_FRAGS)) {
+ int delta = from->truesize - ksize(from->head) -
+ SKB_DATA_ALIGN(sizeof(struct sk_buff));
+
+ WARN_ON_ONCE(delta < len);
+ memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
+ skb_shinfo(from)->frags,
+ skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
+ skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
+ skb_shinfo(from)->nr_frags = 0;
+ to->truesize += delta;
+ atomic_add(delta, &sk->sk_rmem_alloc);
+ sk_mem_charge(sk, delta);
+ to->len += len;
+ to->data_len += len;
+ goto merge;
+ }
+ return 0;
+}
+
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -4487,23 +4539,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
end_seq = TCP_SKB_CB(skb)->end_seq;
if (seq == TCP_SKB_CB(skb1)->end_seq) {
- /* Packets in ofo can stay in queue a long time.
- * Better try to coalesce them right now
- * to avoid future tcp_collapse_ofo_queue(),
- * probably the most expensive function in tcp stack.
- */
- if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) {
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPRCVCOALESCE);
- BUG_ON(skb_copy_bits(skb, 0,
- skb_put(skb1, skb->len),
- skb->len));
- TCP_SKB_CB(skb1)->end_seq = end_seq;
- TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
+ if (tcp_try_coalesce(sk, skb1, skb) <= 0) {
+ __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+ } else {
__kfree_skb(skb);
skb = NULL;
- } else {
- __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
}
if (!tp->rx_opt.num_sacks ||
@@ -4624,13 +4664,18 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
}
if (eaten <= 0) {
+ struct sk_buff *tail;
queue_and_out:
if (eaten < 0 &&
tcp_try_rmem_schedule(sk, skb->truesize))
goto drop;
- skb_set_owner_r(skb, sk);
- __skb_queue_tail(&sk->sk_receive_queue, skb);
+ tail = skb_peek_tail(&sk->sk_receive_queue);
+ eaten = tail ? tcp_try_coalesce(sk, tail, skb) : -1;
+ if (eaten <= 0) {
+ skb_set_owner_r(skb, sk);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ }
}
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if (skb->len)