From 6ad3122a08e3a9c2148873665752e87cf4f393cc Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 22 Feb 2016 10:40:07 +0100
Subject: flowcache: Avoid OOM condition under preasure

We can hit an OOM condition if we are under presure because
we can not free the entries in gc_list fast enough. So add
a counter for the not yet freed entries in the gc_list and
refuse new allocations if the value is too high.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 730d82a..24cd394 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -80,6 +80,7 @@ struct netns_xfrm {
 	struct flow_cache	flow_cache_global;
 	atomic_t		flow_cache_genid;
 	struct list_head	flow_cache_gc_list;
+	atomic_t		flow_cache_gc_count;
 	spinlock_t		flow_cache_gc_lock;
 	struct work_struct	flow_cache_gc_work;
 	struct work_struct	flow_cache_flush_work;
diff --git a/net/core/flow.c b/net/core/flow.c
index 1033725..3937b1b 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -92,8 +92,11 @@ static void flow_cache_gc_task(struct work_struct *work)
 	list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
 	spin_unlock_bh(&xfrm->flow_cache_gc_lock);
 
-	list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
+	list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) {
 		flow_entry_kill(fce, xfrm);
+		atomic_dec(&xfrm->flow_cache_gc_count);
+		WARN_ON(atomic_read(&xfrm->flow_cache_gc_count) < 0);
+	}
 }
 
 static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
@@ -101,6 +104,7 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
 				     struct netns_xfrm *xfrm)
 {
 	if (deleted) {
+		atomic_add(deleted, &xfrm->flow_cache_gc_count);
 		fcp->hash_count -= deleted;
 		spin_lock_bh(&xfrm->flow_cache_gc_lock);
 		list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
@@ -232,6 +236,13 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
 		if (fcp->hash_count > fc->high_watermark)
 			flow_cache_shrink(fc, fcp);
 
+		if (fcp->hash_count > 2 * fc->high_watermark ||
+		    atomic_read(&net->xfrm.flow_cache_gc_count) > fc->high_watermark) {
+			atomic_inc(&net->xfrm.flow_cache_genid);
+			flo = ERR_PTR(-ENOBUFS);
+			goto ret_object;
+		}
+
 		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
 		if (fle) {
 			fle->net = net;
@@ -446,6 +457,7 @@ int flow_cache_init(struct net *net)
 	INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
 	INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
 	mutex_init(&net->xfrm.flow_flush_sem);
+	atomic_set(&net->xfrm.flow_cache_gc_count, 0);
 
 	fc->hash_shift = 10;
 	fc->low_watermark = 2 * flow_cache_hash_size(fc);
-- 
cgit v0.10.2


From 215276c0147ef49bc07692ca68bae35a30a64b9a Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 22 Feb 2016 10:56:45 +0100
Subject: xfrm: Reset encapsulation field of the skb before transformation

The inner headers are invalid after a xfrm transformation.
So reset the skb encapsulation field to ensure nobody tries
to access the inner headers.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index ff4a91f..637387b 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -99,6 +99,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 
 		skb_dst_force(skb);
 
+		/* Inner headers are invalid now. */
+		skb->encapsulation = 0;
+
 		err = x->type->output(x, skb);
 		if (err == -EINPROGRESS)
 			goto out;
-- 
cgit v0.10.2


From d6af1a31cc72fbd558c7eddbc36f61bf09d1cf6a Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 16 Mar 2016 10:17:37 +0100
Subject: vti: Add pmtu handling to vti_xmit.

We currently rely on the PMTU discovery of xfrm.
However if a packet is locally sent, the PMTU mechanism
of xfrm tries to do local socket notification what
might not work for applications like ping that don't
check for this. So add pmtu handling to vti_xmit to
report MTU changes immediately.

Reported-by: Mark McKinstry <Mark.McKinstry@alliedtelesis.co.nz>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 5cf10b7..a917903 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -156,6 +156,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
 	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *tdev;	/* Device to other host */
 	int err;
+	int mtu;
 
 	if (!dst) {
 		dev->stats.tx_carrier_errors++;
@@ -192,6 +193,23 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
 			tunnel->err_count = 0;
 	}
 
+	mtu = dst_mtu(dst);
+	if (skb->len > mtu) {
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+		if (skb->protocol == htons(ETH_P_IP)) {
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				  htonl(mtu));
+		} else {
+			if (mtu < IPV6_MIN_MTU)
+				mtu = IPV6_MIN_MTU;
+
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+		}
+
+		dst_release(dst);
+		goto tx_error;
+	}
+
 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
 	skb_dst_set(skb, dst);
 	skb->dev = skb_dst(skb)->dev;
-- 
cgit v0.10.2