From cb2d0f3e968bff7c6d262aca3e3ab8d4184e69b2 Mon Sep 17 00:00:00 2001
From: David Ward <david.ward@ll.mit.edu>
Date: Sun, 18 Sep 2011 12:53:20 +0000
Subject: macvlan/macvtap: Fix unicast between macvtap interfaces in bridge
 mode

Packets should always be forwarded to the lowerdev using dev_forward_skb.
vlan->forward is for packets being forwarded directly to another macvlan/
macvtap device (used for multicast in bridge mode).

Reported-and-tested-by: Shlomo Pongratz <shlomop@mellanox.com>
Signed-off-by: David Ward <david.ward@ll.mit.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 05172c3..376e3e9 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -239,7 +239,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 		dest = macvlan_hash_lookup(port, eth->h_dest);
 		if (dest && dest->mode == MACVLAN_MODE_BRIDGE) {
 			/* send to lowerdev first for its network taps */
-			vlan->forward(vlan->lowerdev, skb);
+			dev_forward_skb(vlan->lowerdev, skb);
 
 			return NET_XMIT_SUCCESS;
 		}
-- 
cgit v0.10.2


From 260fcbeb1ae9e768a44c9925338fbacb0d7e5ba9 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Thu, 29 Sep 2011 17:10:10 +0000
Subject: tcp: properly handle md5sig_pool references

tcp_v4_clear_md5_list() assumes that multiple tcp md5sig peers
only hold one reference to md5sig_pool. but tcp_v4_md5_do_add()
increases use count of md5sig_pool for each peer. This patch
makes tcp_v4_md5_do_add() only increases use count for the first
tcp md5sig peer.

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c34f015..7963e03 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -927,18 +927,21 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 			}
 			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		}
-		if (tcp_alloc_md5sig_pool(sk) == NULL) {
+
+		md5sig = tp->md5sig_info;
+		if (md5sig->entries4 == 0 &&
+		    tcp_alloc_md5sig_pool(sk) == NULL) {
 			kfree(newkey);
 			return -ENOMEM;
 		}
-		md5sig = tp->md5sig_info;
 
 		if (md5sig->alloced4 == md5sig->entries4) {
 			keys = kmalloc((sizeof(*keys) *
 					(md5sig->entries4 + 1)), GFP_ATOMIC);
 			if (!keys) {
 				kfree(newkey);
-				tcp_free_md5sig_pool();
+				if (md5sig->entries4 == 0)
+					tcp_free_md5sig_pool();
 				return -ENOMEM;
 			}
 
@@ -982,6 +985,7 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 				kfree(tp->md5sig_info->keys4);
 				tp->md5sig_info->keys4 = NULL;
 				tp->md5sig_info->alloced4 = 0;
+				tcp_free_md5sig_pool();
 			} else if (tp->md5sig_info->entries4 != i) {
 				/* Need to do some manipulation */
 				memmove(&tp->md5sig_info->keys4[i],
@@ -989,7 +993,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 					(tp->md5sig_info->entries4 - i) *
 					 sizeof(struct tcp4_md5sig_key));
 			}
-			tcp_free_md5sig_pool();
 			return 0;
 		}
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 79cc646..7b8fc57 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -591,7 +591,8 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
 			}
 			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		}
-		if (tcp_alloc_md5sig_pool(sk) == NULL) {
+		if (tp->md5sig_info->entries6 == 0 &&
+			tcp_alloc_md5sig_pool(sk) == NULL) {
 			kfree(newkey);
 			return -ENOMEM;
 		}
@@ -600,8 +601,9 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
 				       (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
 
 			if (!keys) {
-				tcp_free_md5sig_pool();
 				kfree(newkey);
+				if (tp->md5sig_info->entries6 == 0)
+					tcp_free_md5sig_pool();
 				return -ENOMEM;
 			}
 
@@ -647,6 +649,7 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer)
 				kfree(tp->md5sig_info->keys6);
 				tp->md5sig_info->keys6 = NULL;
 				tp->md5sig_info->alloced6 = 0;
+				tcp_free_md5sig_pool();
 			} else {
 				/* shrink the database */
 				if (tp->md5sig_info->entries6 != i)
@@ -655,7 +658,6 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer)
 						(tp->md5sig_info->entries6 - i)
 						* sizeof (tp->md5sig_info->keys6[0]));
 			}
-			tcp_free_md5sig_pool();
 			return 0;
 		}
 	}
-- 
cgit v0.10.2


From 1e5289e121372a3494402b1b131b41bfe1cf9b7f Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Sun, 2 Oct 2011 04:21:50 +0000
Subject: tcp: properly update lost_cnt_hint during shifting

lost_skb_hint is used by tcp_mark_head_lost() to mark the first unhandled skb.
lost_cnt_hint is the number of packets or sacked packets before the lost_skb_hint;
When shifting a skb that is before the lost_skb_hint, if tcp_is_fack() is ture,
the skb has already been counted in the lost_cnt_hint; if tcp_is_fack() is false,
tcp_sacktag_one() will increase the lost_cnt_hint. So tcp_shifted_skb() does not
need to adjust the lost_cnt_hint by itself. When shifting a skb that is equal to
lost_skb_hint, the shifted packets will not be counted by tcp_mark_head_lost().
So tcp_shifted_skb() should adjust the lost_cnt_hint even tcp_is_fack(tp) is true.

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 21fab3e..d73aab3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1389,9 +1389,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 
 	BUG_ON(!pcount);
 
-	/* Tweak before seqno plays */
-	if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint &&
-	    !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
+	if (skb == tp->lost_skb_hint)
 		tp->lost_cnt_hint += pcount;
 
 	TCP_SKB_CB(prev)->end_seq += shifted;
-- 
cgit v0.10.2


From 3458e21c0d384ca04b27a2ea24d9314c1b57530f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 5 Oct 2011 03:24:43 +0000
Subject: netfilter: Use proper rwlock init function

Replace the open coded initialization with the init function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 2b771dc..5290ac3 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3679,7 +3679,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
 	int idx;
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+	rwlock_init(&ipvs->rs_lock);
 
 	/* Initialize rs_table */
 	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
-- 
cgit v0.10.2


From b64b73d7d0c480f75684519c6134e79d50c1b341 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 3 Oct 2011 18:14:45 +0000
Subject: bridge: leave carrier on for empty bridge

This resolves a regression seen by some users of bridging.
Some users use the bridge like a dummy device.
They expect to be able to put an IPv6 address on the device
with no ports attached. Although there are better ways of doing
this, there is no reason to not allow it.

Note: the bridge still will reflect the state of ports in the
bridge if there are any added.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 32b8f9f..ff3ed60 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -91,7 +91,6 @@ static int br_dev_open(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
 
-	netif_carrier_off(dev);
 	netdev_update_features(dev);
 	netif_start_queue(dev);
 	br_stp_enable_bridge(br);
@@ -108,8 +107,6 @@ static int br_dev_stop(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
 
-	netif_carrier_off(dev);
-
 	br_stp_disable_bridge(br);
 	br_multicast_stop(br);
 
-- 
cgit v0.10.2


From 186c6bbced722cfeff041d2a1264c95f5d042050 Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <benjamin.poirier@gmail.com>
Date: Tue, 4 Oct 2011 04:00:30 +0000
Subject: net: fix typos in Documentation/networking/scaling.txt

The second hunk fixes rps_sock_flow_table but has to re-wrap the paragraph.

Signed-off-by: Benjamin Poirier <benjamin.poirier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt
index 8ce7c30..fe67b5c 100644
--- a/Documentation/networking/scaling.txt
+++ b/Documentation/networking/scaling.txt
@@ -27,7 +27,7 @@ applying a filter to each packet that assigns it to one of a small number
 of logical flows. Packets for each flow are steered to a separate receive
 queue, which in turn can be processed by separate CPUs. This mechanism is
 generally known as “Receive-side Scaling” (RSS). The goal of RSS and
-the other scaling techniques to increase performance uniformly.
+the other scaling techniques is to increase performance uniformly.
 Multi-queue distribution can also be used for traffic prioritization, but
 that is not the focus of these techniques.
 
@@ -186,10 +186,10 @@ are steered using plain RPS. Multiple table entries may point to the
 same CPU. Indeed, with many flows and few CPUs, it is very likely that
 a single application thread handles flows with many different flow hashes.
 
-rps_sock_table is a global flow table that contains the *desired* CPU for
-flows: the CPU that is currently processing the flow in userspace. Each
-table value is a CPU index that is updated during calls to recvmsg and
-sendmsg (specifically, inet_recvmsg(), inet_sendmsg(), inet_sendpage()
+rps_sock_flow_table is a global flow table that contains the *desired* CPU
+for flows: the CPU that is currently processing the flow in userspace.
+Each table value is a CPU index that is updated during calls to recvmsg
+and sendmsg (specifically, inet_recvmsg(), inet_sendmsg(), inet_sendpage()
 and tcp_splice_read()).
 
 When the scheduler moves a thread to a new CPU while it has outstanding
-- 
cgit v0.10.2