Merge branch 'master'

author: Jeff Garzik <jgarzik@pobox.com> 2005-11-11 10:51:24 (GMT)
committer: Jeff Garzik <jgarzik@pobox.com> 2005-11-11 10:51:24 (GMT)
commit: 3b621ee5df437d3f332a635ab6421aaa61a7dc2b (patch)
tree: c4a5236cee8eb7418770802313d36a55f1cc0b1e /net/ipv4
parent: 7211bb9b64f17b23834d91fc3d0c1d78671ee9a8 (diff)
parent: 5e04e7fe774794b837e1d3897e6b96ae2d06679a (diff)
download: linux-fsl-qoriq-3b621ee5df437d3f332a635ab6421aaa61a7dc2b.tar.xz
49 files changed, 1585 insertions, 440 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a9d84f9..eaa150c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -147,8 +147,7 @@ void inet_sock_destruct(struct sock *sk)
 	BUG_TRAP(!sk->sk_wmem_queued);
 	BUG_TRAP(!sk->sk_forward_alloc);
 
-	if (inet->opt)
-		kfree(inet->opt);
+	kfree(inet->opt);
 	dst_release(sk->sk_dst_cache);
 	sk_refcnt_debug_dec(sk);
 }
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 990633c..2267c1f 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -266,8 +266,7 @@ int ip_rt_ioctl(unsigned int cmd, void __user *arg)
 				if (tb)
 					err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
 			}
-			if (rta.rta_mx)
-				kfree(rta.rta_mx);
+			kfree(rta.rta_mx);
 		}
 		rtnl_unlock();
 		return err;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 175e093..e3eceec 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -934,11 +934,11 @@ int icmp_rcv(struct sk_buff *skb)
 	case CHECKSUM_HW:
 		if (!(u16)csum_fold(skb->csum))
 			break;
-		LIMIT_NETDEBUG(KERN_DEBUG "icmp v4 hw csum failure\n");
+		/* fall through */
 	case CHECKSUM_NONE:
-		if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))
+		skb->csum = 0;
+		if (__skb_checksum_complete(skb))
 			goto error;
-	default:;
 	}
 
 	if (!pskb_pull(skb, sizeof(struct icmphdr)))
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c6247fc..c04607b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -872,11 +872,18 @@ int igmp_rcv(struct sk_buff *skb)
 		return 0;
 	}
 
-	if (!pskb_may_pull(skb, sizeof(struct igmphdr)) || 
-	    (u16)csum_fold(skb_checksum(skb, 0, len, 0))) {
-		in_dev_put(in_dev);
-		kfree_skb(skb);
-		return 0;
+	if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
+		goto drop;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_HW:
+		if (!(u16)csum_fold(skb->csum))
+			break;
+		/* fall through */
+	case CHECKSUM_NONE:
+		skb->csum = 0;
+		if (__skb_checksum_complete(skb))
+			goto drop;
 	}
 
 	ih = skb->h.igmph;
@@ -906,6 +913,8 @@ int igmp_rcv(struct sk_buff *skb)
 	default:
 		NETDEBUG(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type);
 	}
+
+drop:
 	in_dev_put(in_dev);
 	kfree_skb(skb);
 	return 0;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 94468a7..3fe021f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -78,17 +78,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
 		int low = sysctl_local_port_range[0];
 		int high = sysctl_local_port_range[1];
 		int remaining = (high - low) + 1;
-		int rover;
+		int rover = net_random() % (high - low) + low;
 
-		spin_lock(&hashinfo->portalloc_lock);
-		if (hashinfo->port_rover < low)
-			rover = low;
-		else
-			rover = hashinfo->port_rover;
 		do {
-			rover++;
-			if (rover > high)
-				rover = low;
 			head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
 			spin_lock(&head->lock);
 			inet_bind_bucket_for_each(tb, node, &head->chain)
@@ -97,9 +89,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
 			break;
 		next:
 			spin_unlock(&head->lock);
+			if (++rover > high)
+				rover = low;
 		} while (--remaining > 0);
-		hashinfo->port_rover = rover;
-		spin_unlock(&hashinfo->portalloc_lock);
 
 		/* Exhausted local port range during search?  It is not
 		 * possible for us to be holding one of the bind hash
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 71f3c73..39061ed 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -724,12 +724,6 @@ done:
 	return skb->len;
 }
 
-static int inet_diag_dump_done(struct netlink_callback *cb)
-{
-	return 0;
-}
-
-
 static __inline__ int
 inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
@@ -760,8 +754,7 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 				goto err_inval;
 		}
 		return netlink_dump_start(idiagnl, skb, nlh,
-					  inet_diag_dump,
-					  inet_diag_dump_done);
+					  inet_diag_dump, NULL);
 	} else {
 		return inet_diag_get_exact(skb, nlh);
 	}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 896ce3f..4e9c74b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -577,15 +577,16 @@ static int ipgre_rcv(struct sk_buff *skb)
 			goto drop_nolock;
 
 		if (flags&GRE_CSUM) {
-			if (skb->ip_summed == CHECKSUM_HW) {
+			switch (skb->ip_summed) {
+			case CHECKSUM_HW:
 				csum = (u16)csum_fold(skb->csum);
-				if (csum)
-					skb->ip_summed = CHECKSUM_NONE;
-			}
-			if (skb->ip_summed == CHECKSUM_NONE) {
-				skb->csum = skb_checksum(skb, 0, skb->len, 0);
+				if (!csum)
+					break;
+				/* fall through */
+			case CHECKSUM_NONE:
+				skb->csum = 0;
+				csum = __skb_checksum_complete(skb);
 				skb->ip_summed = CHECKSUM_HW;
-				csum = (u16)csum_fold(skb->csum);
 			}
 			offset += 4;
 		}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index bce4e87..dbe12da 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -510,8 +510,7 @@ static int ip_options_get_finish(struct ip_options **optp,
 		kfree(opt);
 		return -EINVAL;
 	}
-	if (*optp)
-		kfree(*optp);
+	kfree(*optp);
 	*optp = opt;
 	return 0;
 }
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1775823..11c2f68 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -353,7 +353,8 @@ packet_routed:
 		ip_options_build(skb, opt, inet->daddr, rt, 0);
 	}
 
-	ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs);
+	ip_select_ident_more(iph, &rt->u.dst, sk,
+			     (skb_shinfo(skb)->tso_segs ?: 1) - 1);
 
 	/* Add an IP checksum. */
 	ip_send_check(iph);
@@ -1262,10 +1263,8 @@ int ip_push_pending_frames(struct sock *sk)
 
 out:
 	inet->cork.flags &= ~IPCORK_OPT;
-	if (inet->cork.opt) {
-		kfree(inet->cork.opt);
-		inet->cork.opt = NULL;
-	}
+	kfree(inet->cork.opt);
+	inet->cork.opt = NULL;
 	if (inet->cork.rt) {
 		ip_rt_put(inet->cork.rt);
 		inet->cork.rt = NULL;
@@ -1289,10 +1288,8 @@ void ip_flush_pending_frames(struct sock *sk)
 		kfree_skb(skb);
 
 	inet->cork.flags &= ~IPCORK_OPT;
-	if (inet->cork.opt) {
-		kfree(inet->cork.opt);
-		inet->cork.opt = NULL;
-	}
+	kfree(inet->cork.opt);
+	inet->cork.opt = NULL;
 	if (inet->cork.rt) {
 		ip_rt_put(inet->cork.rt);
 		inet->cork.rt = NULL;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 2f0b47d..4f2d872 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -202,8 +202,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct s
 		if (ra->sk == sk) {
 			if (on) {
 				write_unlock_bh(&ip_ra_lock);
-				if (new_ra)
-					kfree(new_ra);
+				kfree(new_ra);
 				return -EADDRINUSE;
 			}
 			*rap = ra->next;
@@ -446,8 +445,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 #endif
 			}
 			opt = xchg(&inet->opt, opt);
-			if (opt)
-				kfree(opt);
+			kfree(opt);
 			break;
 		}
 		case IP_PKTINFO:
@@ -828,10 +826,8 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 
 			err = ip_mc_msfilter(sk, msf, ifindex);
 mc_msf_out:
-			if (msf)
-				kfree(msf);
-			if (gsf)
-				kfree(gsf);
+			kfree(msf);
+			kfree(gsf);
 			break;
 		}
 		case IP_ROUTER_ALERT:	
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index fc6f95a..d7eb680 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -110,8 +110,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
 	return 0;
 
   out:
-	if (inc->timeout_table)
-		kfree(inc->timeout_table);
+	kfree(inc->timeout_table);
 	kfree(inc);
 	return ret;
 }
@@ -136,8 +135,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
 
 	list_del(&inc->a_list);
 
-	if (inc->timeout_table != NULL)
-		kfree(inc->timeout_table);
+	kfree(inc->timeout_table);
 	kfree(inc);
 }
 
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 981cc32..1a0843c 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -1009,11 +1009,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
 		if (sysctl_ip_vs_expire_nodest_conn) {
 			/* try to expire the connection immediately */
 			ip_vs_conn_expire_now(cp);
-		} else {
-			/* don't restart its timer, and silently
-			   drop the packet. */
-			__ip_vs_conn_put(cp);
 		}
+		/* don't restart its timer, and silently
+		   drop the packet. */
+		__ip_vs_conn_put(cp);
 		return NF_DROP;
 	}
 
diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c
index bd7d75b..d34a9fa 100644
--- a/net/ipv4/multipath_wrandom.c
+++ b/net/ipv4/multipath_wrandom.c
@@ -207,16 +207,12 @@ static void wrandom_select_route(const struct flowi *flp,
 			decision = mpc->rt;
 
 		last_power = mpc->power;
-		if (last_mpc)
-			kfree(last_mpc);
-
+		kfree(last_mpc);
 		last_mpc = mpc;
 	}
 
-	if (last_mpc) {
-		/* concurrent __multipath_flush may lead to !last_mpc */
-		kfree(last_mpc);
-	}
+	/* concurrent __multipath_flush may lead to !last_mpc */
+	kfree(last_mpc);
 
 	decision->u.dst.__use++;
 	*rp = decision;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 7d917e4..9d3c8b5 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -5,6 +5,20 @@
 menu "IP: Netfilter Configuration"
 	depends on INET && NETFILTER
 
+config NF_CONNTRACK_IPV4
+	tristate "IPv4 support for new connection tracking (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && NF_CONNTRACK
+	---help---
+	  Connection tracking keeps a record of what packets have passed
+	  through your machine, in order to figure out how they are related
+	  into connections.
+
+	  This is IPv4 support on Layer 3 independent connection tracking.
+	  Layer 3 independent connection tracking is experimental scheme
+	  which generalize ip_conntrack to support other layer 3 protocols.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 # connection tracking, helpers and protocols
 config IP_NF_CONNTRACK
 	tristate "Connection tracking (required for masq/NAT)"
@@ -209,8 +223,8 @@ config IP_NF_MATCH_PKTTYPE
 	tristate "Packet type match support"
 	depends on IP_NF_IPTABLES
 	help
-         Packet type matching allows you to match a packet by
-         its "class", eg. BROADCAST, MULTICAST, ...
+	  Packet type matching allows you to match a packet by
+	  its "class", eg. BROADCAST, MULTICAST, ...
 
 	  Typical usage:
 	  iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG
@@ -317,7 +331,8 @@ config IP_NF_MATCH_TCPMSS
 
 config IP_NF_MATCH_HELPER
 	tristate "Helper match support"
-	depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+	depends on IP_NF_IPTABLES
+	depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
 	help
 	  Helper matching allows you to match packets in dynamic connections
 	  tracked by a conntrack-helper, ie. ip_conntrack_ftp
@@ -326,7 +341,8 @@ config IP_NF_MATCH_HELPER
 
 config IP_NF_MATCH_STATE
 	tristate "Connection state match support"
-	depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+	depends on IP_NF_IPTABLES
+	depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
 	help
 	  Connection state matching allows you to match packets based on their
 	  relationship to a tracked connection (ie. previous packets).  This
@@ -336,7 +352,8 @@ config IP_NF_MATCH_STATE
 
 config IP_NF_MATCH_CONNTRACK
 	tristate "Connection tracking match support"
-	depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+	depends on IP_NF_IPTABLES
+	depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
 	help
 	  This is a general conntrack match module, a superset of the state match.
 
@@ -422,7 +439,8 @@ config IP_NF_MATCH_COMMENT
 
 config IP_NF_MATCH_CONNMARK
 	tristate  'Connection mark match support'
-	depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES
+	depends on IP_NF_IPTABLES
+	depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
 	help
 	  This option adds a `connmark' match, which allows you to match the
 	  connection mark value previously set for the session by `CONNMARK'. 
@@ -433,7 +451,8 @@ config IP_NF_MATCH_CONNMARK
 
 config IP_NF_MATCH_CONNBYTES
 	tristate  'Connection byte/packet counter match support'
-	depends on IP_NF_CT_ACCT && IP_NF_IPTABLES
+	depends on IP_NF_IPTABLES
+	depends on IP_NF_CT_ACCT || (NF_CT_ACCT && NF_CONNTRACK_IPV4)
 	help
 	  This option adds a `connbytes' match, which allows you to match the
 	  number of bytes and/or packets for each direction within a connection.
@@ -747,7 +766,8 @@ config IP_NF_TARGET_TTL
 
 config IP_NF_TARGET_CONNMARK
 	tristate  'CONNMARK target support'
-	depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE
+	depends on IP_NF_MANGLE
+	depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
 	help
 	  This option adds a `CONNMARK' target, which allows one to manipulate
 	  the connection mark value.  Similar to the MARK target, but
@@ -759,7 +779,8 @@ config IP_NF_TARGET_CONNMARK
 
 config IP_NF_TARGET_CLUSTERIP
 	tristate "CLUSTERIP target support (EXPERIMENTAL)"
-	depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES && EXPERIMENTAL
+	depends on IP_NF_IPTABLES && EXPERIMENTAL
+	depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
 	help
 	  The CLUSTERIP target allows you to build load-balancing clusters of
 	  network servers without having a dedicated load-balancing
@@ -782,7 +803,7 @@ config IP_NF_RAW
 config IP_NF_TARGET_NOTRACK
 	tristate  'NOTRACK target support'
 	depends on IP_NF_RAW
-	depends on IP_NF_CONNTRACK
+	depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
 	help
 	  The NOTRACK target allows a select rule to specify
 	  which packets *not* to enter the conntrack/NAT
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index dab4b58..058c48e 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -103,3 +103,9 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
 obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
 
 obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
+
+# objects for l3 independent conntrack
+nf_conntrack_ipv4-objs  :=  nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
+
+# l3 independent conntrack
+obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 926a668..4108a5e 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -270,14 +270,10 @@ exp_gre(struct ip_conntrack *master,
 	exp_orig->expectfn = pptp_expectfn;
 	exp_orig->flags = 0;
 
-	exp_orig->dir = IP_CT_DIR_ORIGINAL;
-
 	/* both expectations are identical apart from tuple */
 	memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
 	memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
 
-	exp_reply->dir = !exp_orig->dir;
-
 	if (ip_nat_pptp_hook_exp_gre)
 		ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
 	else {
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 166e606..d2a4fec 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -28,11 +28,8 @@
 #include <linux/netlink.h>
 #include <linux/spinlock.h>
 #include <linux/notifier.h>
-#include <linux/rtnetlink.h>
 
 #include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
@@ -58,14 +55,17 @@ ctnetlink_dump_tuples_proto(struct sk_buff *skb,
 			    const struct ip_conntrack_tuple *tuple)
 {
 	struct ip_conntrack_protocol *proto;
+	int ret = 0;
 
 	NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
 
 	proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
-	if (proto && proto->tuple_to_nfattr)
-		return proto->tuple_to_nfattr(skb, tuple);
+	if (likely(proto && proto->tuple_to_nfattr)) {
+		ret = proto->tuple_to_nfattr(skb, tuple);
+		ip_conntrack_proto_put(proto);
+	}
 
-	return 0;
+	return ret;
 
 nfattr_failure:
 	return -1;
@@ -175,7 +175,7 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
 {
 	enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
 	struct nfattr *nest_count = NFA_NEST(skb, type);
-	u_int64_t tmp;
+	u_int32_t tmp;
 
 	tmp = htonl(ct->counters[dir].packets);
 	NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
@@ -479,9 +479,7 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
 
 	DEBUGP("entered %s\n", __FUNCTION__);
 
-	
-	if (nfattr_parse_nested(tb, CTA_IP_MAX, attr) < 0)
-		goto nfattr_failure;
+	nfattr_parse_nested(tb, CTA_IP_MAX, attr);
 
 	if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
 		return -EINVAL;
@@ -497,9 +495,6 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
 	DEBUGP("leaving\n");
 
 	return 0;
-
-nfattr_failure:
-	return -1;
 }
 
 static const int cta_min_proto[CTA_PROTO_MAX] = {
@@ -521,8 +516,7 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr,
 
 	DEBUGP("entered %s\n", __FUNCTION__);
 
-	if (nfattr_parse_nested(tb, CTA_PROTO_MAX, attr) < 0)
-		goto nfattr_failure;
+	nfattr_parse_nested(tb, CTA_PROTO_MAX, attr);
 
 	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
 		return -EINVAL;
@@ -539,9 +533,6 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr,
 	}
 	
 	return ret;
-
-nfattr_failure:
-	return -1;
 }
 
 static inline int
@@ -555,8 +546,7 @@ ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple,
 
 	memset(tuple, 0, sizeof(*tuple));
 
-	if (nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]) < 0)
-		goto nfattr_failure;
+	nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]);
 
 	if (!tb[CTA_TUPLE_IP-1])
 		return -EINVAL;
@@ -583,9 +573,6 @@ ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple,
 	DEBUGP("leaving\n");
 
 	return 0;
-
-nfattr_failure:
-	return -1;
 }
 
 #ifdef CONFIG_IP_NF_NAT_NEEDED
@@ -603,11 +590,10 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr,
 
 	DEBUGP("entered %s\n", __FUNCTION__);
 
-	if (nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr) < 0)
-		goto nfattr_failure;
+	nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr);
 
 	if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
-		goto nfattr_failure;
+		return -EINVAL;
 
 	npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
 	if (!npt)
@@ -626,9 +612,6 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr,
 
 	DEBUGP("leaving\n");
 	return 0;
-
-nfattr_failure:
-	return -1;
 }
 
 static inline int
@@ -642,8 +625,7 @@ ctnetlink_parse_nat(struct nfattr *cda[],
 
 	memset(range, 0, sizeof(*range));
 	
-	if (nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]) < 0)
-		goto nfattr_failure;
+	nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]);
 
 	if (tb[CTA_NAT_MINIP-1])
 		range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
@@ -665,9 +647,6 @@ ctnetlink_parse_nat(struct nfattr *cda[],
 
 	DEBUGP("leaving\n");
 	return 0;
-
-nfattr_failure:
-	return -1;
 }
 #endif
 
@@ -678,8 +657,7 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
 
 	DEBUGP("entered %s\n", __FUNCTION__);
 
-	if (nfattr_parse_nested(tb, CTA_HELP_MAX, attr) < 0)
-		goto nfattr_failure;
+	nfattr_parse_nested(tb, CTA_HELP_MAX, attr);
 
 	if (!tb[CTA_HELP_NAME-1])
 		return -EINVAL;
@@ -687,9 +665,6 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
 	*helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]);
 
 	return 0;
-
-nfattr_failure:
-	return -1;
 }
 
 static int
@@ -804,7 +779,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	ct = tuplehash_to_ctrack(h);
 
 	err = -ENOMEM;
-	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb2) {
 		ip_conntrack_put(ct);
 		return -ENOMEM;
@@ -815,7 +790,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 				  IPCTNL_MSG_CT_NEW, 1, ct);
 	ip_conntrack_put(ct);
 	if (err <= 0)
-		goto out;
+		goto free;
 
 	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
 	if (err < 0)
@@ -824,10 +799,10 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	DEBUGP("leaving\n");
 	return 0;
 
+free:
+	kfree_skb(skb2);
 out:
-	if (skb2)
-		kfree_skb(skb2);
-	return -1;
+	return err;
 }
 
 static inline int
@@ -957,8 +932,7 @@ ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[])
 	u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
 	int err = 0;
 
-	if (nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr) < 0)
-		goto nfattr_failure;
+	nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr);
 
 	proto = ip_conntrack_proto_find_get(npt);
 	if (!proto)
@@ -969,9 +943,6 @@ ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[])
 	ip_conntrack_proto_put(proto); 
 
 	return err;
-
-nfattr_failure:
-	return -ENOMEM;
 }
 
 static int
@@ -1005,6 +976,11 @@ ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
 			return err;
 	}
 
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	if (cda[CTA_MARK-1])
+		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+#endif
+
 	DEBUGP("all done\n");
 	return 0;
 }
@@ -1048,6 +1024,11 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
 	if (ct->helper)
 		ip_conntrack_helper_put(ct->helper);
 
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	if (cda[CTA_MARK-1])
+		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+#endif
+
 	DEBUGP("conntrack with id %u inserted\n", ct->id);
 	return 0;
 
@@ -1312,6 +1293,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (!exp)
 		return -ENOENT;
 
+	if (cda[CTA_EXPECT_ID-1]) {
+		u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+		if (exp->id != ntohl(id)) {
+			ip_conntrack_expect_put(exp);
+			return -ENOENT;
+		}
+	}	
+
 	err = -ENOMEM;
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb2)
@@ -1322,21 +1311,16 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 				      nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
 				      1, exp);
 	if (err <= 0)
-		goto out;
+		goto free;
 
 	ip_conntrack_expect_put(exp);
 
-	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
-	if (err < 0)
-		goto free;
-
-	return err;
+	return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
 
+free:
+	kfree_skb(skb2);
 out:
 	ip_conntrack_expect_put(exp);
-free:
-	if (skb2)
-		kfree_skb(skb2);
 	return err;
 }
 
@@ -1392,7 +1376,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 				ip_conntrack_expect_put(exp);
 			}
 		}
-		write_unlock(&ip_conntrack_lock);
+		write_unlock_bh(&ip_conntrack_lock);
 	} else {
 		/* This basically means we have to flush everything*/
 		write_lock_bh(&ip_conntrack_lock);
@@ -1559,6 +1543,8 @@ static struct nfnetlink_subsystem ctnl_exp_subsys = {
 	.cb				= ctnl_exp_cb,
 };
 
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
+
 static int __init ctnetlink_init(void)
 {
 	int ret;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 98f0015..e4d6b26 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -13,6 +13,7 @@
 #include <linux/in.h>
 #include <linux/icmp.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <net/ip.h>
 #include <net/checksum.h>
 #include <linux/netfilter.h>
@@ -151,13 +152,13 @@ icmp_error_message(struct sk_buff *skb,
 	/* Not enough header? */
 	inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
 	if (inside == NULL)
-		return NF_ACCEPT;
+		return -NF_ACCEPT;
 
 	/* Ignore ICMP's containing fragments (shouldn't happen) */
 	if (inside->ip.frag_off & htons(IP_OFFSET)) {
 		DEBUGP("icmp_error_track: fragment of proto %u\n",
 		       inside->ip.protocol);
-		return NF_ACCEPT;
+		return -NF_ACCEPT;
 	}
 
 	innerproto = ip_conntrack_proto_find_get(inside->ip.protocol);
@@ -166,7 +167,7 @@ icmp_error_message(struct sk_buff *skb,
 	if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) {
 		DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol);
 		ip_conntrack_proto_put(innerproto);
-		return NF_ACCEPT;
+		return -NF_ACCEPT;
 	}
 
 	/* Ordinarily, we'd expect the inverted tupleproto, but it's
@@ -174,7 +175,7 @@ icmp_error_message(struct sk_buff *skb,
 	if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
 		DEBUGP("icmp_error_track: Can't invert tuple\n");
 		ip_conntrack_proto_put(innerproto);
-		return NF_ACCEPT;
+		return -NF_ACCEPT;
 	}
 	ip_conntrack_proto_put(innerproto);
 
@@ -190,7 +191,7 @@ icmp_error_message(struct sk_buff *skb,
 
 		if (!h) {
 			DEBUGP("icmp_error_track: no match\n");
-			return NF_ACCEPT;
+			return -NF_ACCEPT;
 		}
 		/* Reverse direction from that found */
 		if (DIRECTION(h) != IP_CT_DIR_REPLY)
@@ -230,19 +231,15 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
 	case CHECKSUM_HW:
 		if (!(u16)csum_fold(skb->csum)) 
 			break;
-		if (LOG_INVALID(IPPROTO_ICMP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				      "ip_ct_icmp: bad HW ICMP checksum ");
-		return -NF_ACCEPT;
+		/* fall through */
 	case CHECKSUM_NONE:
-		if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
+		skb->csum = 0;
+		if (__skb_checksum_complete(skb)) {
 			if (LOG_INVALID(IPPROTO_ICMP))
 				nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
 					      "ip_ct_icmp: bad ICMP checksum ");
 			return -NF_ACCEPT;
 		}
-	default:
-		break;
 	}
 
 checksum_skipped:
@@ -296,7 +293,8 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
 				struct ip_conntrack_tuple *tuple)
 {
 	if (!tb[CTA_PROTO_ICMP_TYPE-1]
-	    || !tb[CTA_PROTO_ICMP_CODE-1])
+	    || !tb[CTA_PROTO_ICMP_CODE-1]
+	    || !tb[CTA_PROTO_ICMP_ID-1])
 		return -1;
 
 	tuple->dst.u.icmp.type = 
@@ -304,7 +302,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
 	tuple->dst.u.icmp.code =
 			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
 	tuple->src.u.icmp.id =
-			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
+			*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
 
 	return 0;
 }
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index d6701ca..468c600 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -362,8 +362,12 @@ static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
 	struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
 	struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
 
-        if (nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr) < 0)
-                goto nfattr_failure;
+	/* updates could not contain anything about the private
+	 * protocol info, in that case skip the parsing */
+	if (!attr)
+		return 0;
+
+        nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
 
 	if (!tb[CTA_PROTOINFO_TCP_STATE-1])
 		return -EINVAL;
@@ -374,9 +378,6 @@ static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
 	write_unlock_bh(&tcp_lock);
 
 	return 0;
-
-nfattr_failure:
-	return -1;
 }
 #endif
 
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index c5e3abd..762f4d9 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -66,10 +66,8 @@ ip_nat_proto_find_get(u_int8_t protonum)
 	 * removed until we've grabbed the reference */
 	preempt_disable();
 	p = __ip_nat_proto_find(protonum);
-	if (p) {
-		if (!try_module_get(p->me))
-			p = &ip_nat_unknown_protocol;
-	}
+	if (!try_module_get(p->me))
+		p = &ip_nat_unknown_protocol;
 	preempt_enable();
 
 	return p;
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 3cdd068..e546203 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -73,6 +73,7 @@ static void pptp_nat_expected(struct ip_conntrack *ct,
 	struct ip_conntrack_tuple t;
 	struct ip_ct_pptp_master *ct_pptp_info;
 	struct ip_nat_pptp *nat_pptp_info;
+	struct ip_nat_range range;
 
 	ct_pptp_info = &master->help.ct_pptp_info;
 	nat_pptp_info = &master->nat.help.nat_pptp_info;
@@ -110,7 +111,30 @@ static void pptp_nat_expected(struct ip_conntrack *ct,
 		DEBUGP("not found!\n");
 	}
 
-	ip_nat_follow_master(ct, exp);
+	/* This must be a fresh one. */
+	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+	/* Change src to where master sends to */
+	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.min_ip = range.max_ip
+		= ct->master->tuplehash[!exp->dir].tuple.dst.ip;
+	if (exp->dir == IP_CT_DIR_ORIGINAL) {
+		range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+		range.min = range.max = exp->saved_proto;
+	}
+	/* hook doesn't matter, but it has to do source manip */
+	ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+
+	/* For DST manip, map port here to where it's expected. */
+	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.min_ip = range.max_ip
+		= ct->master->tuplehash[!exp->dir].tuple.src.ip;
+	if (exp->dir == IP_CT_DIR_REPLY) {
+		range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+		range.min = range.max = exp->saved_proto;
+	}
+	/* hook doesn't matter, but it has to do destination manip */
+	ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
 }
 
 /* outbound packets == from PNS to PAC */
@@ -213,9 +237,10 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
 
 	/* alter expectation for PNS->PAC direction */
 	invert_tuplepr(&inv_t, &expect_orig->tuple);
-	expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id);
+	expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id);
 	expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
 	expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+	expect_orig->dir = IP_CT_DIR_ORIGINAL;
 	inv_t.src.ip = reply_t->src.ip;
 	inv_t.dst.ip = reply_t->dst.ip;
 	inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
@@ -233,6 +258,7 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
 	expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
 	expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
 	expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+	expect_reply->dir = IP_CT_DIR_REPLY;
 	inv_t.src.ip = orig_t->src.ip;
 	inv_t.dst.ip = orig_t->dst.ip;
 	inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index 7c12854..f7cad7c 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -139,8 +139,8 @@ gre_manip_pkt(struct sk_buff **pskb,
 			break;
 		case GRE_VERSION_PPTP:
 			DEBUGP("call_id -> 0x%04x\n", 
-				ntohl(tuple->dst.u.gre.key));
-			pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key));
+				ntohs(tuple->dst.u.gre.key));
+			pgreh->call_id = tuple->dst.u.gre.key;
 			break;
 		default:
 			DEBUGP("can't nat unknown GRE version\n");
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
index 99bbef5..f0099a6 100644
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -62,7 +62,7 @@ unknown_print_range(char *buffer, const struct ip_nat_range *range)
 
 struct ip_nat_protocol ip_nat_unknown_protocol = {
 	.name			= "unknown",
-	.me			= THIS_MODULE,
+	/* .me isn't set: getting a ref to this cannot fail. */
 	.manip_pkt		= unknown_manip_pkt,
 	.in_range		= unknown_in_range,
 	.unique_tuple		= unknown_unique_tuple,
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 93b2c51..8acb7ed 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -1161,8 +1161,7 @@ static int snmp_parse_mangle(unsigned char *msg,
 		
 		if (!snmp_object_decode(&ctx, obj)) {
 			if (*obj) {
-				if ((*obj)->id)
-					kfree((*obj)->id);
+				kfree((*obj)->id);
 				kfree(*obj);
 			}	
 			kfree(obj);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 9bcb398..45c52d8 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -29,7 +29,7 @@
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
 
 #define CLUSTERIP_VERSION "0.8"
 
@@ -316,14 +316,14 @@ target(struct sk_buff **pskb,
 {
 	const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
 	enum ip_conntrack_info ctinfo;
-	struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
-	u_int32_t hash;
+	u_int32_t *mark, hash;
 
 	/* don't need to clusterip_config_get() here, since refcount
 	 * is only decremented by destroy() - and ip_tables guarantees
 	 * that the ->target() function isn't called after ->destroy() */
 
-	if (!ct) {
+	mark = nf_ct_get_mark((*pskb), &ctinfo);
+	if (mark == NULL) {
 		printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
 			/* FIXME: need to drop invalid ones, since replies
 			 * to outgoing connections of other nodes will be 
@@ -346,7 +346,7 @@ target(struct sk_buff **pskb,
 
 	switch (ctinfo) {
 		case IP_CT_NEW:
-			ct->mark = hash;
+			*mark = hash;
 			break;
 		case IP_CT_RELATED:
 		case IP_CT_RELATED+IP_CT_IS_REPLY:
@@ -363,7 +363,7 @@ target(struct sk_buff **pskb,
 #ifdef DEBUG_CLUSTERP
 	DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 #endif
-	DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark);
+	DEBUGP("hash=%u ct_hash=%u ", hash, *mark);
 	if (!clusterip_responsible(cipinfo->config, hash)) {
 		DEBUGP("not responsible\n");
 		return NF_DROP;
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
index 1346380..8acac5a 100644
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ b/net/ipv4/netfilter/ipt_CONNMARK.c
@@ -29,7 +29,7 @@ MODULE_LICENSE("GPL");
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_CONNMARK.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
 
 static unsigned int
 target(struct sk_buff **pskb,
@@ -43,24 +43,24 @@ target(struct sk_buff **pskb,
 	u_int32_t diff;
 	u_int32_t nfmark;
 	u_int32_t newmark;
+	u_int32_t ctinfo;
+	u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
 
-	enum ip_conntrack_info ctinfo;
-	struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
-	if (ct) {
+	if (ctmark) {
 	    switch(markinfo->mode) {
 	    case IPT_CONNMARK_SET:
-		newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
-		if (newmark != ct->mark)
-		    ct->mark = newmark;
+		newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
+		if (newmark != *ctmark)
+		    *ctmark = newmark;
 		break;
 	    case IPT_CONNMARK_SAVE:
-		newmark = (ct->mark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
-		if (ct->mark != newmark)
-		    ct->mark = newmark;
+		newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
+		if (*ctmark != newmark)
+		    *ctmark = newmark;
 		break;
 	    case IPT_CONNMARK_RESTORE:
 		nfmark = (*pskb)->nfmark;
-		diff = (ct->mark ^ nfmark) & markinfo->mask;
+		diff = (*ctmark ^ nfmark) & markinfo->mask;
 		if (diff != 0)
 		    (*pskb)->nfmark = nfmark ^ diff;
 		break;
@@ -109,6 +109,7 @@ static struct ipt_target ipt_connmark_reg = {
 
 static int __init init(void)
 {
+	need_ip_conntrack();
 	return ipt_register_target(&ipt_connmark_reg);
 }
 
diff --git a/net/ipv4/netfilter/ipt_NOTRACK.c b/net/ipv4/netfilter/ipt_NOTRACK.c
index a4bb9b3..e3c69d0 100644
--- a/net/ipv4/netfilter/ipt_NOTRACK.c
+++ b/net/ipv4/netfilter/ipt_NOTRACK.c
@@ -5,7 +5,7 @@
 #include <linux/skbuff.h>
 
 #include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
 
 static unsigned int
 target(struct sk_buff **pskb,
@@ -23,7 +23,7 @@ target(struct sk_buff **pskb,
 	   If there is a real ct entry correspondig to this packet, 
 	   it'll hang aroun till timing out. We don't deal with it
 	   for performance reasons. JK */
-	(*pskb)->nfct = &ip_conntrack_untracked.ct_general;
+	nf_ct_untrack(*pskb);
 	(*pskb)->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get((*pskb)->nfct);
 
diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c
index df4a42c..d68a048 100644
--- a/net/ipv4/netfilter/ipt_connbytes.c
+++ b/net/ipv4/netfilter/ipt_connbytes.c
@@ -10,7 +10,7 @@
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_connbytes.h>
 
@@ -46,60 +46,59 @@ match(const struct sk_buff *skb,
       int *hotdrop)
 {
 	const struct ipt_connbytes_info *sinfo = matchinfo;
-	enum ip_conntrack_info ctinfo;
-	struct ip_conntrack *ct;
 	u_int64_t what = 0;	/* initialize to make gcc happy */
+	const struct ip_conntrack_counter *counters;
 
-	if (!(ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo)))
+	if (!(counters = nf_ct_get_counters(skb)))
 		return 0; /* no match */
 
 	switch (sinfo->what) {
 	case IPT_CONNBYTES_PKTS:
 		switch (sinfo->direction) {
 		case IPT_CONNBYTES_DIR_ORIGINAL:
-			what = ct->counters[IP_CT_DIR_ORIGINAL].packets;
+			what = counters[IP_CT_DIR_ORIGINAL].packets;
 			break;
 		case IPT_CONNBYTES_DIR_REPLY:
-			what = ct->counters[IP_CT_DIR_REPLY].packets;
+			what = counters[IP_CT_DIR_REPLY].packets;
 			break;
 		case IPT_CONNBYTES_DIR_BOTH:
-			what = ct->counters[IP_CT_DIR_ORIGINAL].packets;
-			what += ct->counters[IP_CT_DIR_REPLY].packets;
+			what = counters[IP_CT_DIR_ORIGINAL].packets;
+			what += counters[IP_CT_DIR_REPLY].packets;
 			break;
 		}
 		break;
 	case IPT_CONNBYTES_BYTES:
 		switch (sinfo->direction) {
 		case IPT_CONNBYTES_DIR_ORIGINAL:
-			what = ct->counters[IP_CT_DIR_ORIGINAL].bytes;
+			what = counters[IP_CT_DIR_ORIGINAL].bytes;
 			break;
 		case IPT_CONNBYTES_DIR_REPLY:
-			what = ct->counters[IP_CT_DIR_REPLY].bytes;
+			what = counters[IP_CT_DIR_REPLY].bytes;
 			break;
 		case IPT_CONNBYTES_DIR_BOTH:
-			what = ct->counters[IP_CT_DIR_ORIGINAL].bytes;
-			what += ct->counters[IP_CT_DIR_REPLY].bytes;
+			what = counters[IP_CT_DIR_ORIGINAL].bytes;
+			what += counters[IP_CT_DIR_REPLY].bytes;
 			break;
 		}
 		break;
 	case IPT_CONNBYTES_AVGPKT:
 		switch (sinfo->direction) {
 		case IPT_CONNBYTES_DIR_ORIGINAL:
-			what = div64_64(ct->counters[IP_CT_DIR_ORIGINAL].bytes,
-					ct->counters[IP_CT_DIR_ORIGINAL].packets);
+			what = div64_64(counters[IP_CT_DIR_ORIGINAL].bytes,
+					counters[IP_CT_DIR_ORIGINAL].packets);
 			break;
 		case IPT_CONNBYTES_DIR_REPLY:
-			what = div64_64(ct->counters[IP_CT_DIR_REPLY].bytes,
-					ct->counters[IP_CT_DIR_REPLY].packets);
+			what = div64_64(counters[IP_CT_DIR_REPLY].bytes,
+					counters[IP_CT_DIR_REPLY].packets);
 			break;
 		case IPT_CONNBYTES_DIR_BOTH:
 			{
 				u_int64_t bytes;
 				u_int64_t pkts;
-				bytes = ct->counters[IP_CT_DIR_ORIGINAL].bytes +
-					ct->counters[IP_CT_DIR_REPLY].bytes;
-				pkts = ct->counters[IP_CT_DIR_ORIGINAL].packets+
-					ct->counters[IP_CT_DIR_REPLY].packets;
+				bytes = counters[IP_CT_DIR_ORIGINAL].bytes +
+					counters[IP_CT_DIR_REPLY].bytes;
+				pkts = counters[IP_CT_DIR_ORIGINAL].packets+
+					counters[IP_CT_DIR_REPLY].packets;
 
 				/* FIXME_THEORETICAL: what to do if sum
 				 * overflows ? */
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c
index bf8de47..5306ef2 100644
--- a/net/ipv4/netfilter/ipt_connmark.c
+++ b/net/ipv4/netfilter/ipt_connmark.c
@@ -28,7 +28,7 @@ MODULE_LICENSE("GPL");
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_connmark.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
 
 static int
 match(const struct sk_buff *skb,
@@ -39,12 +39,12 @@ match(const struct sk_buff *skb,
       int *hotdrop)
 {
 	const struct ipt_connmark_info *info = matchinfo;
-	enum ip_conntrack_info ctinfo;
-	struct ip_conntrack *ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
-	if (!ct)
+	u_int32_t ctinfo;
+	const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo);
+	if (!ctmark)
 		return 0;
 
-	return ((ct->mark & info->mask) == info->mark) ^ info->invert;
+	return (((*ctmark) & info->mask) == info->mark) ^ info->invert;
 }
 
 static int
diff --git a/net/ipv4/netfilter/ipt_conntrack.c b/net/ipv4/netfilter/ipt_conntrack.c
index c1d2280..c8d1870 100644
--- a/net/ipv4/netfilter/ipt_conntrack.c
+++ b/net/ipv4/netfilter/ipt_conntrack.c
@@ -10,7 +10,14 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
 #include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
+#else
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_conntrack.h>
 
@@ -18,6 +25,8 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables connection tracking match module");
 
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
@@ -102,6 +111,93 @@ match(const struct sk_buff *skb,
 	return 1;
 }
 
+#else /* CONFIG_IP_NF_CONNTRACK */
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	const struct ipt_conntrack_info *sinfo = matchinfo;
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned int statebit;
+
+	ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
+
+#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
+
+	if (ct == &nf_conntrack_untracked)
+		statebit = IPT_CONNTRACK_STATE_UNTRACKED;
+	else if (ct)
+ 		statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
+ 	else
+ 		statebit = IPT_CONNTRACK_STATE_INVALID;
+ 
+	if(sinfo->flags & IPT_CONNTRACK_STATE) {
+		if (ct) {
+			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip !=
+			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip)
+				statebit |= IPT_CONNTRACK_STATE_SNAT;
+
+			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip !=
+			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip)
+				statebit |= IPT_CONNTRACK_STATE_DNAT;
+		}
+
+		if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
+			return 0;
+	}
+
+	if(sinfo->flags & IPT_CONNTRACK_PROTO) {
+		if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
+                	return 0;
+	}
+
+	if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
+			return 0;
+	}
+
+	if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
+			return 0;
+	}
+
+	if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
+			return 0;
+	}
+
+	if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
+			return 0;
+	}
+
+	if(sinfo->flags & IPT_CONNTRACK_STATUS) {
+		if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
+			return 0;
+	}
+
+	if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
+		unsigned long expires;
+
+		if(!ct)
+			return 0;
+
+		expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
+
+		if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
+			return 0;
+	}
+
+	return 1;
+}
+
+#endif /* CONFIG_NF_IP_CONNTRACK */
+
 static int check(const char *tablename,
 		 const struct ipt_ip *ip,
 		 void *matchinfo,
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c
index 3e7dd01..bf14e1c 100644
--- a/net/ipv4/netfilter/ipt_helper.c
+++ b/net/ipv4/netfilter/ipt_helper.c
@@ -13,9 +13,15 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter.h>
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#else
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#endif
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_helper.h>
 
@@ -29,6 +35,7 @@ MODULE_DESCRIPTION("iptables helper match module");
 #define DEBUGP(format, args...)
 #endif
 
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
@@ -73,6 +80,53 @@ out_unlock:
 	return ret;
 }
 
+#else /* CONFIG_IP_NF_CONNTRACK */
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	const struct ipt_helper_info *info = matchinfo;
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	int ret = info->invert;
+	
+	ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
+	if (!ct) {
+		DEBUGP("ipt_helper: Eek! invalid conntrack?\n");
+		return ret;
+	}
+
+	if (!ct->master) {
+		DEBUGP("ipt_helper: conntrack %p has no master\n", ct);
+		return ret;
+	}
+
+	read_lock_bh(&nf_conntrack_lock);
+	if (!ct->master->helper) {
+		DEBUGP("ipt_helper: master ct %p has no helper\n", 
+			exp->expectant);
+		goto out_unlock;
+	}
+
+	DEBUGP("master's name = %s , info->name = %s\n", 
+		ct->master->helper->name, info->name);
+
+	if (info->name[0] == '\0')
+		ret ^= 1;
+	else
+		ret ^= !strncmp(ct->master->helper->name, info->name, 
+		                strlen(ct->master->helper->name));
+out_unlock:
+	read_unlock_bh(&nf_conntrack_lock);
+	return ret;
+}
+#endif
+
 static int check(const char *tablename,
 		 const struct ipt_ip *ip,
 		 void *matchinfo,
diff --git a/net/ipv4/netfilter/ipt_state.c b/net/ipv4/netfilter/ipt_state.c
index b1511b9..4d7f16b 100644
--- a/net/ipv4/netfilter/ipt_state.c
+++ b/net/ipv4/netfilter/ipt_state.c
@@ -10,7 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_state.h>
 
@@ -30,9 +30,9 @@ match(const struct sk_buff *skb,
 	enum ip_conntrack_info ctinfo;
 	unsigned int statebit;
 
-	if (skb->nfct == &ip_conntrack_untracked.ct_general)
+	if (nf_ct_is_untracked(skb))
 		statebit = IPT_STATE_UNTRACKED;
-	else if (!ip_conntrack_get(skb, &ctinfo))
+	else if (!nf_ct_get_ctinfo(skb, &ctinfo))
 		statebit = IPT_STATE_INVALID;
 	else
 		statebit = IPT_STATE_BIT(ctinfo);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
new file mode 100644
index 0000000..8202c1c
--- /dev/null
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -0,0 +1,571 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *	- move L3 protocol dependent part to this file.
+ * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *	- add get_features() to support various size of conntrack
+ *	  structures.
+ *
+ * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat);
+
+static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+			     struct nf_conntrack_tuple *tuple)
+{
+	u_int32_t _addrs[2], *ap;
+	ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
+				sizeof(u_int32_t) * 2, _addrs);
+	if (ap == NULL)
+		return 0;
+
+	tuple->src.u3.ip = ap[0];
+	tuple->dst.u3.ip = ap[1];
+
+	return 1;
+}
+
+static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
+			   const struct nf_conntrack_tuple *orig)
+{
+	tuple->src.u3.ip = orig->dst.u3.ip;
+	tuple->dst.u3.ip = orig->src.u3.ip;
+
+	return 1;
+}
+
+static int ipv4_print_tuple(struct seq_file *s,
+			    const struct nf_conntrack_tuple *tuple)
+{
+	return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
+		          NIPQUAD(tuple->src.u3.ip),
+			  NIPQUAD(tuple->dst.u3.ip));
+}
+
+static int ipv4_print_conntrack(struct seq_file *s,
+				const struct nf_conn *conntrack)
+{
+	return 0;
+}
+
+/* Returns new sk_buff, or NULL */
+static struct sk_buff *
+nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+{
+	skb_orphan(skb);
+
+        local_bh_disable();
+        skb = ip_defrag(skb, user);
+        local_bh_enable();
+
+        if (skb)
+		ip_send_check(skb->nh.iph);
+
+        return skb;
+}
+
+static int
+ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
+	     u_int8_t *protonum)
+{
+	/* Never happen */
+	if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
+		if (net_ratelimit()) {
+			printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n",
+			(*pskb)->nh.iph->protocol, hooknum);
+		}
+		return -NF_DROP;
+	}
+
+	*dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4;
+	*protonum = (*pskb)->nh.iph->protocol;
+
+	return NF_ACCEPT;
+}
+
+int nat_module_is_loaded = 0;
+static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple)
+{
+	if (nat_module_is_loaded)
+		return NF_CT_F_NAT;
+
+	return NF_CT_F_BASIC;
+}
+
+static unsigned int ipv4_confirm(unsigned int hooknum,
+				 struct sk_buff **pskb,
+				 const struct net_device *in,
+				 const struct net_device *out,
+				 int (*okfn)(struct sk_buff *))
+{
+	/* We've seen it coming out the other side: confirm it */
+	return nf_conntrack_confirm(pskb);
+}
+
+static unsigned int ipv4_conntrack_help(unsigned int hooknum,
+				      struct sk_buff **pskb,
+				      const struct net_device *in,
+				      const struct net_device *out,
+				      int (*okfn)(struct sk_buff *))
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+
+	/* This is where we call the helper: as the packet goes out. */
+	ct = nf_ct_get(*pskb, &ctinfo);
+	if (ct && ct->helper) {
+		unsigned int ret;
+		ret = ct->helper->help(pskb,
+				       (*pskb)->nh.raw - (*pskb)->data
+						       + (*pskb)->nh.iph->ihl*4,
+				       ct, ctinfo);
+		if (ret != NF_ACCEPT)
+			return ret;
+	}
+	return NF_ACCEPT;
+}
+
+static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
+					  struct sk_buff **pskb,
+					  const struct net_device *in,
+					  const struct net_device *out,
+					  int (*okfn)(struct sk_buff *))
+{
+#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
+	/* Previously seen (loopback)?  Ignore.  Do this before
+	   fragment check. */
+	if ((*pskb)->nfct)
+		return NF_ACCEPT;
+#endif
+
+	/* Gather fragments. */
+	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+		*pskb = nf_ct_ipv4_gather_frags(*pskb,
+						hooknum == NF_IP_PRE_ROUTING ?
+						IP_DEFRAG_CONNTRACK_IN :
+						IP_DEFRAG_CONNTRACK_OUT);
+		if (!*pskb)
+			return NF_STOLEN;
+	}
+	return NF_ACCEPT;
+}
+
+static unsigned int ipv4_refrag(unsigned int hooknum,
+				struct sk_buff **pskb,
+				const struct net_device *in,
+				const struct net_device *out,
+				int (*okfn)(struct sk_buff *))
+{
+	struct rtable *rt = (struct rtable *)(*pskb)->dst;
+
+	/* We've seen it coming out the other side: confirm */
+	if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
+		return NF_DROP;
+
+	/* Local packets are never produced too large for their
+	   interface.  We degfragment them at LOCAL_OUT, however,
+	   so we have to refragment them here. */
+	if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
+	    !skb_shinfo(*pskb)->tso_size) {
+		/* No hook can be after us, so this should be OK. */
+		ip_fragment(*pskb, okfn);
+		return NF_STOLEN;
+	}
+	return NF_ACCEPT;
+}
+
+static unsigned int ipv4_conntrack_in(unsigned int hooknum,
+				      struct sk_buff **pskb,
+				      const struct net_device *in,
+				      const struct net_device *out,
+				      int (*okfn)(struct sk_buff *))
+{
+	return nf_conntrack_in(PF_INET, hooknum, pskb);
+}
+
+static unsigned int ipv4_conntrack_local(unsigned int hooknum,
+				         struct sk_buff **pskb,
+				         const struct net_device *in,
+				         const struct net_device *out,
+				         int (*okfn)(struct sk_buff *))
+{
+	/* root is playing with raw sockets. */
+	if ((*pskb)->len < sizeof(struct iphdr)
+	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+		if (net_ratelimit())
+			printk("ipt_hook: happy cracking.\n");
+		return NF_ACCEPT;
+	}
+	return nf_conntrack_in(PF_INET, hooknum, pskb);
+}
+
+/* Connection tracking may drop packets, but never alters them, so
+   make it the first hook. */
+static struct nf_hook_ops ipv4_conntrack_defrag_ops = {
+	.hook		= ipv4_conntrack_defrag,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_PRE_ROUTING,
+	.priority	= NF_IP_PRI_CONNTRACK_DEFRAG,
+};
+
+static struct nf_hook_ops ipv4_conntrack_in_ops = {
+	.hook		= ipv4_conntrack_in,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_PRE_ROUTING,
+	.priority	= NF_IP_PRI_CONNTRACK,
+};
+
+static struct nf_hook_ops ipv4_conntrack_defrag_local_out_ops = {
+	.hook           = ipv4_conntrack_defrag,
+	.owner          = THIS_MODULE,
+	.pf             = PF_INET,
+	.hooknum        = NF_IP_LOCAL_OUT,
+	.priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
+};
+
+static struct nf_hook_ops ipv4_conntrack_local_out_ops = {
+	.hook		= ipv4_conntrack_local,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_LOCAL_OUT,
+	.priority	= NF_IP_PRI_CONNTRACK,
+};
+
+/* helpers */
+static struct nf_hook_ops ipv4_conntrack_helper_out_ops = {
+	.hook		= ipv4_conntrack_help,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_POST_ROUTING,
+	.priority	= NF_IP_PRI_CONNTRACK_HELPER,
+};
+
+static struct nf_hook_ops ipv4_conntrack_helper_in_ops = {
+	.hook		= ipv4_conntrack_help,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_LOCAL_IN,
+	.priority	= NF_IP_PRI_CONNTRACK_HELPER,
+};
+
+
+/* Refragmenter; last chance. */
+static struct nf_hook_ops ipv4_conntrack_out_ops = {
+	.hook		= ipv4_refrag,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_POST_ROUTING,
+	.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
+};
+
+static struct nf_hook_ops ipv4_conntrack_local_in_ops = {
+	.hook		= ipv4_confirm,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_LOCAL_IN,
+	.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
+};
+
+#ifdef CONFIG_SYSCTL
+/* From nf_conntrack_proto_icmp.c */
+extern unsigned long nf_ct_icmp_timeout;
+static struct ctl_table_header *nf_ct_ipv4_sysctl_header;
+
+static ctl_table nf_ct_sysctl_table[] = {
+	{
+		.ctl_name	= NET_NF_CONNTRACK_ICMP_TIMEOUT,
+		.procname	= "nf_conntrack_icmp_timeout",
+		.data		= &nf_ct_icmp_timeout,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+        { .ctl_name = 0 }
+};
+
+static ctl_table nf_ct_netfilter_table[] = {
+	{
+		.ctl_name       = NET_NETFILTER,
+		.procname       = "netfilter",
+		.mode           = 0555,
+		.child          = nf_ct_sysctl_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table nf_ct_net_table[] = {
+	{
+		.ctl_name       = CTL_NET,
+		.procname       = "net",
+		.mode           = 0555,
+		.child          = nf_ct_netfilter_table,
+	},
+	{ .ctl_name = 0 }
+};
+#endif
+
+/* Fast function for those who don't want to parse /proc (and I don't
+   blame them). */
+/* Reversing the socket's dst/src point of view gives us the reply
+   mapping. */
+static int
+getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conntrack_tuple tuple;
+	
+	NF_CT_TUPLE_U_BLANK(&tuple);
+	tuple.src.u3.ip = inet->rcv_saddr;
+	tuple.src.u.tcp.port = inet->sport;
+	tuple.dst.u3.ip = inet->daddr;
+	tuple.dst.u.tcp.port = inet->dport;
+	tuple.src.l3num = PF_INET;
+	tuple.dst.protonum = IPPROTO_TCP;
+
+	/* We only do TCP at the moment: is there a better way? */
+	if (strcmp(sk->sk_prot->name, "TCP")) {
+		DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
+		return -ENOPROTOOPT;
+	}
+
+	if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
+		DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
+		       *len, sizeof(struct sockaddr_in));
+		return -EINVAL;
+	}
+
+	h = nf_conntrack_find_get(&tuple, NULL);
+	if (h) {
+		struct sockaddr_in sin;
+		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+		sin.sin_family = AF_INET;
+		sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+			.tuple.dst.u.tcp.port;
+		sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+			.tuple.dst.u3.ip;
+
+		DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
+		       NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+		nf_ct_put(ct);
+		if (copy_to_user(user, &sin, sizeof(sin)) != 0)
+			return -EFAULT;
+		else
+			return 0;
+	}
+	DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
+	       NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port),
+	       NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port));
+	return -ENOENT;
+}
+
+static struct nf_sockopt_ops so_getorigdst = {
+	.pf		= PF_INET,
+	.get_optmin	= SO_ORIGINAL_DST,
+	.get_optmax	= SO_ORIGINAL_DST+1,
+	.get		= &getorigdst,
+};
+
+struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
+	.l3proto	 = PF_INET,
+	.name		 = "ipv4",
+	.pkt_to_tuple	 = ipv4_pkt_to_tuple,
+	.invert_tuple	 = ipv4_invert_tuple,
+	.print_tuple	 = ipv4_print_tuple,
+	.print_conntrack = ipv4_print_conntrack,
+	.prepare	 = ipv4_prepare,
+	.get_features	 = ipv4_get_features,
+	.me		 = THIS_MODULE,
+};
+
+extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp4;
+extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4;
+extern struct nf_conntrack_protocol nf_conntrack_protocol_icmp;
+static int init_or_cleanup(int init)
+{
+	int ret = 0;
+
+	if (!init) goto cleanup;
+
+	ret = nf_register_sockopt(&so_getorigdst);
+	if (ret < 0) {
+		printk(KERN_ERR "Unable to register netfilter socket option\n");
+		goto cleanup_nothing;
+	}
+
+	ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp4);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv4: can't register tcp.\n");
+		goto cleanup_sockopt;
+	}
+
+	ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp4);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv4: can't register udp.\n");
+		goto cleanup_tcp;
+	}
+
+	ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmp);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv4: can't register icmp.\n");
+		goto cleanup_udp;
+	}
+
+	ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv4: can't register ipv4\n");
+		goto cleanup_icmp;
+	}
+
+	ret = nf_register_hook(&ipv4_conntrack_defrag_ops);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv4: can't register pre-routing defrag hook.\n");
+		goto cleanup_ipv4;
+	}
+	ret = nf_register_hook(&ipv4_conntrack_defrag_local_out_ops);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv4: can't register local_out defrag hook.\n");
+		goto cleanup_defragops;
+	}
+
+	ret = nf_register_hook(&ipv4_conntrack_in_ops);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv4: can't register pre-routing hook.\n");
+		goto cleanup_defraglocalops;
+	}
+
+	ret = nf_register_hook(&ipv4_conntrack_local_out_ops);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv4: can't register local out hook.\n");
+		goto cleanup_inops;
+	}
+
+	ret = nf_register_hook(&ipv4_conntrack_helper_in_ops);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv4: can't register local helper hook.\n");
+		goto cleanup_inandlocalops;
+	}
+
+	ret = nf_register_hook(&ipv4_conntrack_helper_out_ops);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv4: can't register postrouting helper hook.\n");
+		goto cleanup_helperinops;
+	}
+
+	ret = nf_register_hook(&ipv4_conntrack_out_ops);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv4: can't register post-routing hook.\n");
+		goto cleanup_helperoutops;
+	}
+
+	ret = nf_register_hook(&ipv4_conntrack_local_in_ops);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv4: can't register local in hook.\n");
+		goto cleanup_inoutandlocalops;
+	}
+
+#ifdef CONFIG_SYSCTL
+	nf_ct_ipv4_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
+	if (nf_ct_ipv4_sysctl_header == NULL) {
+		printk("nf_conntrack: can't register to sysctl.\n");
+		ret = -ENOMEM;
+		goto cleanup_localinops;
+	}
+#endif
+
+	/* For use by REJECT target */
+	ip_ct_attach = __nf_conntrack_attach;
+
+	return ret;
+
+ cleanup:
+	synchronize_net();
+	ip_ct_attach = NULL;
+#ifdef CONFIG_SYSCTL
+ 	unregister_sysctl_table(nf_ct_ipv4_sysctl_header);
+ cleanup_localinops:
+#endif
+	nf_unregister_hook(&ipv4_conntrack_local_in_ops);
+ cleanup_inoutandlocalops:
+	nf_unregister_hook(&ipv4_conntrack_out_ops);
+ cleanup_helperoutops:
+	nf_unregister_hook(&ipv4_conntrack_helper_out_ops);
+ cleanup_helperinops:
+	nf_unregister_hook(&ipv4_conntrack_helper_in_ops);
+ cleanup_inandlocalops:
+	nf_unregister_hook(&ipv4_conntrack_local_out_ops);
+ cleanup_inops:
+	nf_unregister_hook(&ipv4_conntrack_in_ops);
+ cleanup_defraglocalops:
+	nf_unregister_hook(&ipv4_conntrack_defrag_local_out_ops);
+ cleanup_defragops:
+	nf_unregister_hook(&ipv4_conntrack_defrag_ops);
+ cleanup_ipv4:
+	nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+ cleanup_icmp:
+	nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp);
+ cleanup_udp:
+	nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4);
+ cleanup_tcp:
+	nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4);
+ cleanup_sockopt:
+	nf_unregister_sockopt(&so_getorigdst);
+ cleanup_nothing:
+	return ret;
+}
+
+MODULE_LICENSE("GPL");
+
+static int __init init(void)
+{
+	need_nf_conntrack();
+	return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+	init_or_cleanup(0);
+}
+
+module_init(init);
+module_exit(fini);
+
+void need_ip_conntrack(void)
+{
+}
+
+EXPORT_SYMBOL(need_ip_conntrack);
+EXPORT_SYMBOL(nf_ct_ipv4_gather_frags);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
new file mode 100644
index 0000000..7ddb5c0
--- /dev/null
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -0,0 +1,301 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *	- enable working with Layer 3 protocol independent connection tracking.
+ *
+ * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/icmp.h>
+#include <linux/seq_file.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+unsigned long nf_ct_icmp_timeout = 30*HZ;
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int icmp_pkt_to_tuple(const struct sk_buff *skb,
+			     unsigned int dataoff,
+			     struct nf_conntrack_tuple *tuple)
+{
+	struct icmphdr _hdr, *hp;
+
+	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL)
+		return 0;
+
+	tuple->dst.u.icmp.type = hp->type;
+	tuple->src.u.icmp.id = hp->un.echo.id;
+	tuple->dst.u.icmp.code = hp->code;
+
+	return 1;
+}
+
+static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
+			     const struct nf_conntrack_tuple *orig)
+{
+	/* Add 1; spaces filled with 0. */
+	static u_int8_t invmap[]
+		= { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+		    [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+		    [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+		    [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+		    [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+		    [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+		    [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+		    [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
+
+	if (orig->dst.u.icmp.type >= sizeof(invmap)
+	    || !invmap[orig->dst.u.icmp.type])
+		return 0;
+
+	tuple->src.u.icmp.id = orig->src.u.icmp.id;
+	tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
+	tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+	return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int icmp_print_tuple(struct seq_file *s,
+			    const struct nf_conntrack_tuple *tuple)
+{
+	return seq_printf(s, "type=%u code=%u id=%u ",
+			  tuple->dst.u.icmp.type,
+			  tuple->dst.u.icmp.code,
+			  ntohs(tuple->src.u.icmp.id));
+}
+
+/* Print out the private part of the conntrack. */
+static int icmp_print_conntrack(struct seq_file *s,
+				const struct nf_conn *conntrack)
+{
+	return 0;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmp_packet(struct nf_conn *ct,
+		       const struct sk_buff *skb,
+		       unsigned int dataoff,
+		       enum ip_conntrack_info ctinfo,
+		       int pf,
+		       unsigned int hooknum)
+{
+	/* Try to delete connection immediately after all replies:
+           won't actually vanish as we still have skb, and del_timer
+           means this will only run once even if count hits zero twice
+           (theoretically possible with SMP) */
+	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+		if (atomic_dec_and_test(&ct->proto.icmp.count)
+		    && del_timer(&ct->timeout))
+			ct->timeout.function((unsigned long)ct);
+	} else {
+		atomic_inc(&ct->proto.icmp.count);
+		nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
+	}
+
+	return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int icmp_new(struct nf_conn *conntrack,
+		    const struct sk_buff *skb, unsigned int dataoff)
+{
+	static u_int8_t valid_new[]
+		= { [ICMP_ECHO] = 1,
+		    [ICMP_TIMESTAMP] = 1,
+		    [ICMP_INFO_REQUEST] = 1,
+		    [ICMP_ADDRESS] = 1 };
+
+	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
+	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
+		/* Can't create a new ICMP `conn' with this. */
+		DEBUGP("icmp: can't create new conn with type %u\n",
+		       conntrack->tuplehash[0].tuple.dst.u.icmp.type);
+		NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
+		return 0;
+	}
+	atomic_set(&conntrack->proto.icmp.count, 0);
+	return 1;
+}
+
+extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
+/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
+static int
+icmp_error_message(struct sk_buff *skb,
+                 enum ip_conntrack_info *ctinfo,
+                 unsigned int hooknum)
+{
+	struct nf_conntrack_tuple innertuple, origtuple;
+	struct {
+		struct icmphdr icmp;
+		struct iphdr ip;
+	} _in, *inside;
+	struct nf_conntrack_protocol *innerproto;
+	struct nf_conntrack_tuple_hash *h;
+	int dataoff;
+
+	NF_CT_ASSERT(skb->nfct == NULL);
+
+	/* Not enough header? */
+	inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
+	if (inside == NULL)
+		return -NF_ACCEPT;
+
+	/* Ignore ICMP's containing fragments (shouldn't happen) */
+	if (inside->ip.frag_off & htons(IP_OFFSET)) {
+		DEBUGP("icmp_error_message: fragment of proto %u\n",
+		       inside->ip.protocol);
+		return -NF_ACCEPT;
+	}
+
+	innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol);
+	dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
+	/* Are they talking about one of our connections? */
+	if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
+			     inside->ip.protocol, &origtuple,
+			     &nf_conntrack_l3proto_ipv4, innerproto)) {
+		DEBUGP("icmp_error_message: ! get_tuple p=%u",
+		       inside->ip.protocol);
+		return -NF_ACCEPT;
+	}
+
+        /* Ordinarily, we'd expect the inverted tupleproto, but it's
+           been preserved inside the ICMP. */
+        if (!nf_ct_invert_tuple(&innertuple, &origtuple,
+				&nf_conntrack_l3proto_ipv4, innerproto)) {
+		DEBUGP("icmp_error_message: no match\n");
+		return -NF_ACCEPT;
+	}
+
+	*ctinfo = IP_CT_RELATED;
+
+	h = nf_conntrack_find_get(&innertuple, NULL);
+	if (!h) {
+		/* Locally generated ICMPs will match inverted if they
+		   haven't been SNAT'ed yet */
+		/* FIXME: NAT code has to handle half-done double NAT --RR */
+		if (hooknum == NF_IP_LOCAL_OUT)
+			h = nf_conntrack_find_get(&origtuple, NULL);
+
+		if (!h) {
+			DEBUGP("icmp_error_message: no match\n");
+			return -NF_ACCEPT;
+		}
+
+		/* Reverse direction from that found */
+		if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+			*ctinfo += IP_CT_IS_REPLY;
+	} else {
+		if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+			*ctinfo += IP_CT_IS_REPLY;
+	}
+
+        /* Update skb to refer to this connection */
+        skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
+        skb->nfctinfo = *ctinfo;
+        return -NF_ACCEPT;
+}
+
+/* Small and modified version of icmp_rcv */
+static int
+icmp_error(struct sk_buff *skb, unsigned int dataoff,
+	   enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
+{
+	struct icmphdr _ih, *icmph;
+
+	/* Not enough header? */
+	icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
+	if (icmph == NULL) {
+		if (LOG_INVALID(IPPROTO_ICMP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+				      "nf_ct_icmp: short packet ");
+		return -NF_ACCEPT;
+	}
+
+	/* See ip_conntrack_proto_tcp.c */
+	if (hooknum != NF_IP_PRE_ROUTING)
+		goto checksum_skipped;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_HW:
+		if (!(u16)csum_fold(skb->csum))
+			break;
+		if (LOG_INVALID(IPPROTO_ICMP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+				      "nf_ct_icmp: bad HW ICMP checksum ");
+		return -NF_ACCEPT;
+	case CHECKSUM_NONE:
+		if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
+			if (LOG_INVALID(IPPROTO_ICMP))
+				nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+					      NULL,
+					      "nf_ct_icmp: bad ICMP checksum ");
+			return -NF_ACCEPT;
+		}
+	default:
+		break;
+	}
+
+checksum_skipped:
+	/*
+	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
+	 *
+	 *	RFC 1122: 3.2.2  Unknown ICMP messages types MUST be silently
+	 *		  discarded.
+	 */
+	if (icmph->type > NR_ICMP_TYPES) {
+		if (LOG_INVALID(IPPROTO_ICMP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+				      "nf_ct_icmp: invalid ICMP type ");
+		return -NF_ACCEPT;
+	}
+
+	/* Need to track icmp error message? */
+	if (icmph->type != ICMP_DEST_UNREACH
+	    && icmph->type != ICMP_SOURCE_QUENCH
+	    && icmph->type != ICMP_TIME_EXCEEDED
+	    && icmph->type != ICMP_PARAMETERPROB
+	    && icmph->type != ICMP_REDIRECT)
+		return NF_ACCEPT;
+
+	return icmp_error_message(skb, ctinfo, hooknum);
+}
+
+struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
+{
+	.list			= { NULL, NULL },
+	.l3proto		= PF_INET,
+	.proto			= IPPROTO_ICMP,
+	.name			= "icmp",
+	.pkt_to_tuple		= icmp_pkt_to_tuple,
+	.invert_tuple		= icmp_invert_tuple,
+	.print_tuple		= icmp_print_tuple,
+	.print_conntrack	= icmp_print_conntrack,
+	.packet			= icmp_packet,
+	.new			= icmp_new,
+	.error			= icmp_error,
+	.destroy		= NULL,
+	.me			= NULL
+};
+
+EXPORT_SYMBOL(nf_conntrack_protocol_icmp);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 6526856..01444a0 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
 		.proc_handler	= &proc_tcp_congestion_control,
 		.strategy	= &sysctl_tcp_congestion_control,
 	},
+	{
+		.ctl_name	= NET_TCP_ABC,
+		.procname	= "tcp_abc",
+		.data		= &sysctl_tcp_abc,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 
 	{ .ctl_name = 0 }
 };
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f3f0013..9ac7a4f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1640,7 +1640,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	} else if (tcp_need_reset(old_state) ||
 		   (tp->snd_nxt != tp->write_seq &&
 		    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
-		/* The last check adjusts for discrepance of Linux wrt. RFC
+		/* The last check adjusts for discrepancy of Linux wrt. RFC
 		 * states
 		 */
 		tcp_send_active_reset(sk, gfp_any());
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->packets_out = 0;
 	tp->snd_ssthresh = 0x7fffffff;
 	tp->snd_cwnd_cnt = 0;
+	tp->bytes_acked = 0;
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
@@ -2112,7 +2113,6 @@ void __init tcp_init(void)
 		sysctl_tcp_max_orphans >>= (3 - order);
 		sysctl_max_syn_backlog = 128;
 	}
-	tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1;
 
 	sysctl_tcp_mem[0] =  768 << order;
 	sysctl_tcp_mem[1] = 1024 << order;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index ae35e06..1d0cd86 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -217,17 +217,15 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack,
 
 	bictcp_low_utilization(sk, data_acked);
 
-	if (in_flight < tp->snd_cwnd)
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-	if (tp->snd_cwnd <= tp->snd_ssthresh) {
-		/* In "safe" area, increase. */
-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-			tp->snd_cwnd++;
-	} else {
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
 		bictcp_update(ca, tp->snd_cwnd);
 
-                /* In dangerous area, increase slowly.
+		/* In dangerous area, increase slowly.
 		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
 		 */
 		if (tp->snd_cwnd_cnt >= ca->cnt) {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index bbf2d66..c7cc62c 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -186,24 +186,32 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (in_flight < tp->snd_cwnd)
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-        if (tp->snd_cwnd <= tp->snd_ssthresh) {
-                /* In "safe" area, increase. */
-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-			tp->snd_cwnd++;
-	} else {
-                /* In dangerous area, increase slowly.
-		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
-		 */
-		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-				tp->snd_cwnd++;
-			tp->snd_cwnd_cnt = 0;
-		} else
-			tp->snd_cwnd_cnt++;
-	}
+	/* In "safe" area, increase. */
+        if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+
+ 	/* In dangerous area, increase slowly. */
+	else if (sysctl_tcp_abc) {
+ 		/* RFC3465: Apppriate Byte Count
+ 		 * increase once for each full cwnd acked
+ 		 */
+ 		if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
+ 			tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
+ 			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ 				tp->snd_cwnd++;
+ 		}
+ 	} else {
+ 		/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
+ 		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ 			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ 				tp->snd_cwnd++;
+ 			tp->snd_cwnd_cnt = 0;
+ 		} else
+ 			tp->snd_cwnd_cnt++;
+ 	}
 }
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 6acc04b..82b3c18 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -111,18 +111,17 @@ static void hstcp_init(struct sock *sk)
 }
 
 static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
-			     u32 in_flight, int good)
+			     u32 in_flight, u32 pkts_acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct hstcp *ca = inet_csk_ca(sk);
 
-	if (in_flight < tp->snd_cwnd)
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-	if (tp->snd_cwnd <= tp->snd_ssthresh) {
-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-			tp->snd_cwnd++;
-	} else {
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
 		/* Update AIMD parameters */
 		if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
 			while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index e47b379..3284cfb 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -207,14 +207,13 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
 
-	if (in_flight < tp->snd_cwnd)
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-        if (tp->snd_cwnd <= tp->snd_ssthresh) {
-                /* In "safe" area, increase. */
-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-			tp->snd_cwnd++;
-	} else {
+        if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
+
 		measure_rtt(sk);
 
 		/* keep track of number of round-trip times since last backoff event */
@@ -224,7 +223,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 			htcp_alpha_update(ca);
 		}
 
-                /* In dangerous area, increase slowly.
+		/* In dangerous area, increase slowly.
 		 * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
 		 */
 		if ((tp->snd_cwnd_cnt++ * ca->alpha)>>7 >= tp->snd_cwnd) {
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 77add63..40dbb38 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -100,12 +100,12 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 		ca->minrtt = tp->srtt;
 	}
 
+	if (!tcp_is_cwnd_limited(sk, in_flight))
+		return;
+
 	if (!ca->hybla_en)
 		return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag);
 
-	if (in_flight < tp->snd_cwnd)
-		return;
-
 	if (ca->rho == 0)
 		hybla_recalc_param(sk);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3e98b57..40a26b7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -42,7 +42,7 @@
  *		Andi Kleen	:	Moved open_request checking here
  *					and process RSTs for open_requests.
  *		Andi Kleen	:	Better prune_queue, and other fixes.
- *		Andrey Savochkin:	Fix RTT measurements in the presnce of
+ *		Andrey Savochkin:	Fix RTT measurements in the presence of
  *					timestamps.
  *		Andrey Savochkin:	Check sequence numbers correctly when
  *					removing SACKs due to in sequence incoming
@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
 int sysctl_tcp_nometrics_save;
 
 int sysctl_tcp_moderate_rcvbuf = 1;
+int sysctl_tcp_abc = 1;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
@@ -223,7 +224,7 @@ static void tcp_fixup_sndbuf(struct sock *sk)
  *   of receiver window. Check #2.
  *
  * The scheme does not work when sender sends good segments opening
- * window and then starts to feed us spagetti. But it should work
+ * window and then starts to feed us spaghetti. But it should work
  * in common situations. Otherwise, we have to rely on queue collapsing.
  */
 
@@ -233,7 +234,7 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
 {
 	/* Optimize this! */
 	int truesize = tcp_win_from_space(skb->truesize)/2;
-	int window = tcp_full_space(sk)/2;
+	int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
 
 	while (tp->rcv_ssthresh <= window) {
 		if (truesize <= skb->len)
@@ -277,7 +278,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
 	int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
 
 	/* Try to select rcvbuf so that 4 mss-sized segments
-	 * will fit to window and correspoding skbs will fit to our rcvbuf.
+	 * will fit to window and corresponding skbs will fit to our rcvbuf.
 	 * (was 3; 4 is minimum to allow fast retransmit to work.)
 	 */
 	while (tcp_win_from_space(rcvmem) < tp->advmss)
@@ -286,7 +287,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
 		sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
 }
 
-/* 4. Try to fixup all. It is made iimediately after connection enters
+/* 4. Try to fixup all. It is made immediately after connection enters
  *    established state.
  */
 static void tcp_init_buffer_space(struct sock *sk)
@@ -326,37 +327,18 @@ static void tcp_init_buffer_space(struct sock *sk)
 static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct sk_buff *skb;
-	unsigned int app_win = tp->rcv_nxt - tp->copied_seq;
-	int ofo_win = 0;
 
 	icsk->icsk_ack.quick = 0;
 
-	skb_queue_walk(&tp->out_of_order_queue, skb) {
-		ofo_win += skb->len;
-	}
-
-	/* If overcommit is due to out of order segments,
-	 * do not clamp window. Try to expand rcvbuf instead.
-	 */
-	if (ofo_win) {
-		if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
-		    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
-		    !tcp_memory_pressure &&
-		    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
-			sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
-					    sysctl_tcp_rmem[2]);
+	if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
+	    !tcp_memory_pressure &&
+	    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+		sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
+				    sysctl_tcp_rmem[2]);
 	}
-	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
-		app_win += ofo_win;
-		if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf)
-			app_win >>= 1;
-		if (app_win > icsk->icsk_ack.rcv_mss)
-			app_win -= icsk->icsk_ack.rcv_mss;
-		app_win = max(app_win, 2U*tp->advmss);
-
+	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
 		tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
-	}
 }
 
 /* Receiver "autotuning" code.
@@ -385,8 +367,8 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
 		 * are stalled on filesystem I/O.
 		 *
 		 * Also, since we are only going for a minimum in the
-		 * non-timestamp case, we do not smoothe things out
-		 * else with timestamps disabled convergance takes too
+		 * non-timestamp case, we do not smoother things out
+		 * else with timestamps disabled convergence takes too
 		 * long.
 		 */
 		if (!win_dep) {
@@ -395,7 +377,7 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
 		} else if (m < new_sample)
 			new_sample = m << 3;
 	} else {
-		/* No previous mesaure. */
+		/* No previous measure. */
 		new_sample = m << 3;
 	}
 
@@ -524,7 +506,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
 			if (icsk->icsk_ack.ato > icsk->icsk_rto)
 				icsk->icsk_ack.ato = icsk->icsk_rto;
 		} else if (m > icsk->icsk_rto) {
-			/* Too long gap. Apparently sender falled to
+			/* Too long gap. Apparently sender failed to
 			 * restart window, so that we send ACKs quickly.
 			 */
 			tcp_incr_quickack(sk);
@@ -548,10 +530,9 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
  * To save cycles in the RFC 1323 implementation it was better to break
  * it up into three procedures. -- erics
  */
-static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
+static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	const struct inet_connection_sock *icsk = inet_csk(sk);
 	long m = mrtt; /* RTT */
 
 	/*	The following amusing code comes from Jacobson's
@@ -565,7 +546,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
 	 *
 	 * Funny. This algorithm seems to be very broken.
 	 * These formulae increase RTO, when it should be decreased, increase
-	 * too slowly, when it should be incresed fastly, decrease too fastly
+	 * too slowly, when it should be increased fastly, decrease too fastly
 	 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
 	 * does not matter how to _calculate_ it. Seems, it was trap
 	 * that VJ failed to avoid. 8)
@@ -610,9 +591,6 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
 		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
 		tp->rtt_seq = tp->snd_nxt;
 	}
-
-	if (icsk->icsk_ca_ops->rtt_sample)
-		icsk->icsk_ca_ops->rtt_sample(sk, *usrtt);
 }
 
 /* Calculate rto without backoff.  This is the second half of Van Jacobson's
@@ -629,14 +607,14 @@ static inline void tcp_set_rto(struct sock *sk)
 	 *    at least by solaris and freebsd. "Erratic ACKs" has _nothing_
 	 *    to do with delayed acks, because at cwnd>2 true delack timeout
 	 *    is invisible. Actually, Linux-2.4 also generates erratic
-	 *    ACKs in some curcumstances.
+	 *    ACKs in some circumstances.
 	 */
 	inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
 
 	/* 2. Fixups made earlier cannot be right.
 	 *    If we do not estimate RTO correctly without them,
 	 *    all the algo is pure shit and should be replaced
-	 *    with correct one. It is exaclty, which we pretend to do.
+	 *    with correct one. It is exactly, which we pretend to do.
 	 */
 }
 
@@ -794,7 +772,7 @@ static void tcp_init_metrics(struct sock *sk)
 	 * to make it more realistic.
 	 *
 	 * A bit of theory. RTT is time passed after "normal" sized packet
-	 * is sent until it is ACKed. In normal curcumstances sending small
+	 * is sent until it is ACKed. In normal circumstances sending small
 	 * packets force peer to delay ACKs and calculation is correct too.
 	 * The algorithm is adaptive and, provided we follow specs, it
 	 * NEVER underestimate RTT. BUT! If peer tries to make some clever
@@ -919,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	int prior_fackets;
 	u32 lost_retrans = 0;
 	int flag = 0;
+	int dup_sack = 0;
 	int i;
 
 	if (!tp->sacked_out)
 		tp->fackets_out = 0;
 	prior_fackets = tp->fackets_out;
 
-	for (i=0; i<num_sacks; i++, sp++) {
-		struct sk_buff *skb;
-		__u32 start_seq = ntohl(sp->start_seq);
-		__u32 end_seq = ntohl(sp->end_seq);
-		int fack_count = 0;
-		int dup_sack = 0;
+	/* SACK fastpath:
+	 * if the only SACK change is the increase of the end_seq of
+	 * the first block then only apply that SACK block
+	 * and use retrans queue hinting otherwise slowpath */
+	flag = 1;
+	for (i = 0; i< num_sacks; i++) {
+		__u32 start_seq = ntohl(sp[i].start_seq);
+		__u32 end_seq =	 ntohl(sp[i].end_seq);
+
+		if (i == 0){
+			if (tp->recv_sack_cache[i].start_seq != start_seq)
+				flag = 0;
+		} else {
+			if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
+			    (tp->recv_sack_cache[i].end_seq != end_seq))
+				flag = 0;
+		}
+		tp->recv_sack_cache[i].start_seq = start_seq;
+		tp->recv_sack_cache[i].end_seq = end_seq;
 
 		/* Check for D-SACK. */
 		if (i == 0) {
@@ -962,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			if (before(ack, prior_snd_una - tp->max_window))
 				return 0;
 		}
+	}
+
+	if (flag)
+		num_sacks = 1;
+	else {
+		int j;
+		tp->fastpath_skb_hint = NULL;
+
+		/* order SACK blocks to allow in order walk of the retrans queue */
+		for (i = num_sacks-1; i > 0; i--) {
+			for (j = 0; j < i; j++){
+				if (after(ntohl(sp[j].start_seq),
+					  ntohl(sp[j+1].start_seq))){
+					sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq);
+					sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq);
+					sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq);
+					sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq);
+				}
+
+			}
+		}
+	}
+
+	/* clear flag as used for different purpose in following code */
+	flag = 0;
+
+	for (i=0; i<num_sacks; i++, sp++) {
+		struct sk_buff *skb;
+		__u32 start_seq = ntohl(sp->start_seq);
+		__u32 end_seq = ntohl(sp->end_seq);
+		int fack_count;
+
+		/* Use SACK fastpath hint if valid */
+		if (tp->fastpath_skb_hint) {
+			skb = tp->fastpath_skb_hint;
+			fack_count = tp->fastpath_cnt_hint;
+		} else {
+			skb = sk->sk_write_queue.next;
+			fack_count = 0;
+		}
 
 		/* Event "B" in the comment above. */
 		if (after(end_seq, tp->high_seq))
 			flag |= FLAG_DATA_LOST;
 
-		sk_stream_for_retrans_queue(skb, sk) {
+		sk_stream_for_retrans_queue_from(skb, sk) {
 			int in_sack, pcount;
 			u8 sacked;
 
+			tp->fastpath_skb_hint = skb;
+			tp->fastpath_cnt_hint = fack_count;
+
 			/* The retransmission queue is always in order, so
 			 * we can short-circuit the walk early.
 			 */
@@ -1045,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 						TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
 						tp->lost_out -= tcp_skb_pcount(skb);
 						tp->retrans_out -= tcp_skb_pcount(skb);
+
+						/* clear lost hint */
+						tp->retransmit_skb_hint = NULL;
 					}
 				} else {
 					/* New sack for not retransmitted frame,
@@ -1057,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 					if (sacked & TCPCB_LOST) {
 						TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 						tp->lost_out -= tcp_skb_pcount(skb);
+
+						/* clear lost hint */
+						tp->retransmit_skb_hint = NULL;
 					}
 				}
 
@@ -1080,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			    (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 				tp->retrans_out -= tcp_skb_pcount(skb);
+				tp->retransmit_skb_hint = NULL;
 			}
 		}
 	}
@@ -1107,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 				tp->retrans_out -= tcp_skb_pcount(skb);
 
+				/* clear lost hint */
+				tp->retransmit_skb_hint = NULL;
+
 				if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
 					tp->lost_out += tcp_skb_pcount(skb);
 					TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -1214,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk)
 	tcp_set_ca_state(sk, TCP_CA_Loss);
 	tp->high_seq = tp->frto_highmark;
 	TCP_ECN_queue_cwr(tp);
+
+	clear_all_retrans_hints(tp);
 }
 
 void tcp_clear_retrans(struct tcp_sock *tp)
@@ -1251,6 +1298,7 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tp->snd_cwnd_cnt   = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 
+	tp->bytes_acked = 0;
 	tcp_clear_retrans(tp);
 
 	/* Push undo marker, if it was plain RTO and nothing
@@ -1279,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tcp_set_ca_state(sk, TCP_CA_Loss);
 	tp->high_seq = tp->snd_nxt;
 	TCP_ECN_queue_cwr(tp);
+
+	clear_all_retrans_hints(tp);
 }
 
 static int tcp_check_sack_reneging(struct sock *sk)
@@ -1503,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
 			       int packets, u32 high_seq)
 {
 	struct sk_buff *skb;
-	int cnt = packets;
+	int cnt;
 
-	BUG_TRAP(cnt <= tp->packets_out);
+	BUG_TRAP(packets <= tp->packets_out);
+	if (tp->lost_skb_hint) {
+		skb = tp->lost_skb_hint;
+		cnt = tp->lost_cnt_hint;
+	} else {
+		skb = sk->sk_write_queue.next;
+		cnt = 0;
+	}
 
-	sk_stream_for_retrans_queue(skb, sk) {
-		cnt -= tcp_skb_pcount(skb);
-		if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+	sk_stream_for_retrans_queue_from(skb, sk) {
+		/* TODO: do this better */
+		/* this is not the most efficient way to do this... */
+		tp->lost_skb_hint = skb;
+		tp->lost_cnt_hint = cnt;
+		cnt += tcp_skb_pcount(skb);
+		if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
 			break;
 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 			tp->lost_out += tcp_skb_pcount(skb);
+
+			/* clear xmit_retransmit_queue hints
+			 *  if this is beyond hint */
+			if(tp->retransmit_skb_hint != NULL &&
+			   before(TCP_SKB_CB(skb)->seq,
+				  TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
+
+				tp->retransmit_skb_hint = NULL;
+			}
 		}
 	}
 	tcp_sync_left_out(tp);
@@ -1540,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
 	if (tcp_head_timedout(sk, tp)) {
 		struct sk_buff *skb;
 
-		sk_stream_for_retrans_queue(skb, sk) {
-			if (tcp_skb_timedout(sk, skb) &&
-			    !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
+		skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
+			: sk->sk_write_queue.next;
+
+		sk_stream_for_retrans_queue_from(skb, sk) {
+			if (!tcp_skb_timedout(sk, skb))
+				break;
+
+			if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
 				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 				tp->lost_out += tcp_skb_pcount(skb);
+
+				/* clear xmit_retrans hint */
+				if (tp->retransmit_skb_hint &&
+				    before(TCP_SKB_CB(skb)->seq,
+					   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
+
+					tp->retransmit_skb_hint = NULL;
 			}
 		}
+
+		tp->scoreboard_skb_hint = skb;
+
 		tcp_sync_left_out(tp);
 	}
 }
@@ -1626,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
 	}
 	tcp_moderate_cwnd(tp);
 	tp->snd_cwnd_stamp = tcp_time_stamp;
+
+	/* There is something screwy going on with the retrans hints after
+	   an undo */
+	clear_all_retrans_hints(tp);
 }
 
 static inline int tcp_may_undo(struct tcp_sock *tp)
@@ -1709,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
 		sk_stream_for_retrans_queue(skb, sk) {
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 		}
+
+		clear_all_retrans_hints(tp);
+
 		DBGUNDO(sk, tp, "partial loss");
 		tp->lost_out = 0;
 		tp->left_out = tp->sacked_out;
@@ -1908,6 +2000,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 			TCP_ECN_queue_cwr(tp);
 		}
 
+		tp->bytes_acked = 0;
 		tp->snd_cwnd_cnt = 0;
 		tcp_set_ca_state(sk, TCP_CA_Recovery);
 	}
@@ -1919,9 +2012,9 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 }
 
 /* Read draft-ietf-tcplw-high-performance before mucking
- * with this code. (Superceeds RFC1323)
+ * with this code. (Supersedes RFC1323)
  */
-static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
+static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
 {
 	/* RTTM Rule: A TSecr value received in a segment is used to
 	 * update the averaged RTT measurement only if the segment
@@ -1932,7 +2025,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
 	 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
 	 *
 	 * Changed: reset backoff as soon as we see the first valid sample.
-	 * If we do not, we get strongly overstimated rto. With timestamps
+	 * If we do not, we get strongly overestimated rto. With timestamps
 	 * samples are accepted even from very old segments: f.e., when rtt=1
 	 * increases to 8, we retransmit 5 times and after 8 seconds delayed
 	 * answer arrives rto becomes 120 seconds! If at least one of segments
@@ -1940,13 +2033,13 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
 	 */
 	struct tcp_sock *tp = tcp_sk(sk);
 	const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
-	tcp_rtt_estimator(sk, seq_rtt, usrtt);
+	tcp_rtt_estimator(sk, seq_rtt);
 	tcp_set_rto(sk);
 	inet_csk(sk)->icsk_backoff = 0;
 	tcp_bound_rto(sk);
 }
 
-static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag)
+static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
 {
 	/* We don't have a timestamp. Can only use
 	 * packets that are not retransmitted to determine
@@ -1960,21 +2053,21 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag
 	if (flag & FLAG_RETRANS_DATA_ACKED)
 		return;
 
-	tcp_rtt_estimator(sk, seq_rtt, usrtt);
+	tcp_rtt_estimator(sk, seq_rtt);
 	tcp_set_rto(sk);
 	inet_csk(sk)->icsk_backoff = 0;
 	tcp_bound_rto(sk);
 }
 
 static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
-				      const s32 seq_rtt, u32 *usrtt)
+				      const s32 seq_rtt)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	/* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
 	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
-		tcp_ack_saw_tstamp(sk, usrtt, flag);
+		tcp_ack_saw_tstamp(sk, flag);
 	else if (seq_rtt >= 0)
-		tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag);
+		tcp_ack_no_tstamp(sk, seq_rtt, flag);
 }
 
 static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
@@ -2054,20 +2147,27 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
 	return acked;
 }
 
+static inline u32 tcp_usrtt(const struct sk_buff *skb)
+{
+	struct timeval tv, now;
+
+	do_gettimeofday(&now);
+	skb_get_timestamp(skb, &tv);
+	return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec);
+}
 
 /* Remove acknowledged frames from the retransmission queue. */
-static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt)
+static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sk_buff *skb;
 	__u32 now = tcp_time_stamp;
 	int acked = 0;
 	__s32 seq_rtt = -1;
-	struct timeval usnow;
 	u32 pkts_acked = 0;
-
-	if (seq_usrtt)
-		do_gettimeofday(&usnow);
+	void (*rtt_sample)(struct sock *sk, u32 usrtt)
+		= icsk->icsk_ca_ops->rtt_sample;
 
 	while ((skb = skb_peek(&sk->sk_write_queue)) &&
 	       skb != sk->sk_send_head) {
@@ -2107,16 +2207,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
 					tp->retrans_out -= tcp_skb_pcount(skb);
 				acked |= FLAG_RETRANS_DATA_ACKED;
 				seq_rtt = -1;
-			} else if (seq_rtt < 0)
+			} else if (seq_rtt < 0) {
 				seq_rtt = now - scb->when;
-			if (seq_usrtt) {
-				struct timeval tv;
-			
-				skb_get_timestamp(skb, &tv);
-				*seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000
-					+ (usnow.tv_usec - tv.tv_usec);
+				if (rtt_sample)
+					(*rtt_sample)(sk, tcp_usrtt(skb));
 			}
-
 			if (sacked & TCPCB_SACKED_ACKED)
 				tp->sacked_out -= tcp_skb_pcount(skb);
 			if (sacked & TCPCB_LOST)
@@ -2126,17 +2221,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
 				    !before(scb->end_seq, tp->snd_up))
 					tp->urg_mode = 0;
 			}
-		} else if (seq_rtt < 0)
+		} else if (seq_rtt < 0) {
 			seq_rtt = now - scb->when;
+			if (rtt_sample)
+				(*rtt_sample)(sk, tcp_usrtt(skb));
+		}
 		tcp_dec_pcount_approx(&tp->fackets_out, skb);
 		tcp_packets_out_dec(tp, skb);
 		__skb_unlink(skb, &sk->sk_write_queue);
 		sk_stream_free_skb(sk, skb);
+		clear_all_retrans_hints(tp);
 	}
 
 	if (acked&FLAG_ACKED) {
-		const struct inet_connection_sock *icsk = inet_csk(sk);
-		tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt);
+		tcp_ack_update_rtt(sk, acked, seq_rtt);
 		tcp_ack_packets_out(sk, tp);
 
 		if (icsk->icsk_ca_ops->pkts_acked)
@@ -2284,7 +2382,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
 	}
 
 	/* F-RTO affects on two new ACKs following RTO.
-	 * At latest on third ACK the TCP behavor is back to normal.
+	 * At latest on third ACK the TCP behavior is back to normal.
 	 */
 	tp->frto_counter = (tp->frto_counter + 1) % 3;
 }
@@ -2299,7 +2397,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 	u32 prior_in_flight;
 	s32 seq_rtt;
-	s32 seq_usrtt = 0;
 	int prior_packets;
 
 	/* If the ack is newer than sent or older than previous acks
@@ -2311,6 +2408,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	if (before(ack, prior_snd_una))
 		goto old_ack;
 
+	if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
+		tp->bytes_acked += ack - prior_snd_una;
+
 	if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
 		/* Window is constant, pure forward advance.
 		 * No more checks are required.
@@ -2352,14 +2452,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	prior_in_flight = tcp_packets_in_flight(tp);
 
 	/* See if we can take anything off of the retransmit queue. */
-	flag |= tcp_clean_rtx_queue(sk, &seq_rtt,
-				    icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL);
+	flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
 
 	if (tp->frto_counter)
 		tcp_process_frto(sk, prior_snd_una);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
-		/* Advanve CWND, if state allows this. */
+		/* Advance CWND, if state allows this. */
 		if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
 			tcp_cong_avoid(sk, ack,  seq_rtt, prior_in_flight, 0);
 		tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
@@ -3148,7 +3247,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 {
 	struct sk_buff *skb;
 
-	/* First, check that queue is collapsable and find
+	/* First, check that queue is collapsible and find
 	 * the point where collapsing can be useful. */
 	for (skb = head; skb != tail; ) {
 		/* No new bits? It is possible on ofo queue. */
@@ -3456,7 +3555,7 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk)
 
 /*
  *	This routine is only called when we have urgent data
- *	signalled. Its the 'slow' part of tcp_urg. It could be
+ *	signaled. Its the 'slow' part of tcp_urg. It could be
  *	moved inline now as tcp_urg is only called from one
  *	place. We handle URGent data wrong. We have to - as
  *	BSD still doesn't use the correction from RFC961.
@@ -3501,7 +3600,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
 	 * urgent. To do this requires some care. We cannot just ignore
 	 * tp->copied_seq since we would read the last urgent byte again
 	 * as data, nor can we alter copied_seq until this data arrives
-	 * or we break the sematics of SIOCATMARK (and thus sockatmark())
+	 * or we break the semantics of SIOCATMARK (and thus sockatmark())
 	 *
 	 * NOTE. Double Dutch. Rendering to plain English: author of comment
 	 * above did something sort of 	send("A", MSG_OOB); send("B", MSG_OOB);
@@ -3646,7 +3745,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	tp->rx_opt.saw_tstamp = 0;
 
 	/*	pred_flags is 0xS?10 << 16 + snd_wnd
-	 *	if header_predition is to be made
+	 *	if header_prediction is to be made
 	 *	'S' will always be tp->tcp_header_len >> 2
 	 *	'?' will be 0 for the fast path, otherwise pred_flags is 0 to
 	 *  turn it off	(when there are holes in the receive 
@@ -4242,7 +4341,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				 */
 				if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
 				    !tp->srtt)
-					tcp_ack_saw_tstamp(sk, NULL, 0);
+					tcp_ack_saw_tstamp(sk, 0);
 
 				if (tp->rx_opt.tstamp_ok)
 					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -4372,6 +4471,7 @@ discard:
 
 EXPORT_SYMBOL(sysctl_tcp_ecn);
 EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_abc);
 EXPORT_SYMBOL(tcp_parse_options);
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c85819d..4d5021e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -39,7 +39,7 @@
  *					request_sock handling and moved
  *					most of it into the af independent code.
  *					Added tail drop and some other bugfixes.
- *					Added new listen sematics.
+ *					Added new listen semantics.
  *		Mike McLagan	:	Routing by source
  *	Juan Jose Ciarlante:		ip_dynaddr bits
  *		Andi Kleen:		various fixes.
@@ -93,8 +93,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
 	.lhash_lock	= RW_LOCK_UNLOCKED,
 	.lhash_users	= ATOMIC_INIT(0),
 	.lhash_wait	= __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
-	.portalloc_lock	= SPIN_LOCK_UNLOCKED,
-	.port_rover	= 1024 - 1,
 };
 
 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
@@ -825,8 +823,7 @@ out:
  */
 static void tcp_v4_reqsk_destructor(struct request_sock *req)
 {
-	if (inet_rsk(req)->opt)
-		kfree(inet_rsk(req)->opt);
+	kfree(inet_rsk(req)->opt);
 }
 
 static inline void syn_flood_warning(struct sk_buff *skb)
@@ -1113,24 +1110,18 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 static int tcp_v4_checksum_init(struct sk_buff *skb)
 {
 	if (skb->ip_summed == CHECKSUM_HW) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
 		if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
-				  skb->nh.iph->daddr, skb->csum))
+				  skb->nh.iph->daddr, skb->csum)) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			return 0;
-
-		LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v4 csum failed\n");
-		skb->ip_summed = CHECKSUM_NONE;
+		}
 	}
+
+	skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
+				       skb->len, IPPROTO_TCP, 0);
+
 	if (skb->len <= 76) {
-		if (tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
-				 skb->nh.iph->daddr,
-				 skb_checksum(skb, 0, skb->len, 0)))
-			return -1;
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	} else {
-		skb->csum = ~tcp_v4_check(skb->h.th, skb->len,
-					  skb->nh.iph->saddr,
-					  skb->nh.iph->daddr, 0);
+		return __skb_checksum_complete(skb);
 	}
 	return 0;
 }
@@ -1219,10 +1210,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
 
 	/* An explanation is required here, I think.
 	 * Packet length and doff are validated by header prediction,
-	 * provided case of th->doff==0 is elimineted.
+	 * provided case of th->doff==0 is eliminated.
 	 * So, we defer the checks. */
 	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	     tcp_v4_checksum_init(skb) < 0))
+	     tcp_v4_checksum_init(skb)))
 		goto bad_packet;
 
 	th = skb->h.th;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b1a63b2..1b66a2a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -158,7 +158,7 @@ kill_with_rst:
 		/* I am shamed, but failed to make it more elegant.
 		 * Yes, it is direct reference to IP, which is impossible
 		 * to generalize to IPv6. Taking into account that IPv6
-		 * do not undertsnad recycling in any case, it not
+		 * do not understand recycling in any case, it not
 		 * a big problem in practice. --ANK */
 		if (tw->tw_family == AF_INET &&
 		    tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
@@ -194,7 +194,7 @@ kill_with_rst:
 		/* In window segment, it may be only reset or bare ack. */
 
 		if (th->rst) {
-			/* This is TIME_WAIT assasination, in two flavors.
+			/* This is TIME_WAIT assassination, in two flavors.
 			 * Oh well... nobody has a sufficient solution to this
 			 * protocol bug yet.
 			 */
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		 */
 		newtp->snd_cwnd = 2;
 		newtp->snd_cwnd_cnt = 0;
+		newtp->bytes_acked = 0;
 
 		newtp->frto_counter = 0;
 		newtp->frto_highmark = 0;
@@ -550,7 +551,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 
 	/* RFC793 page 36: "If the connection is in any non-synchronized state ...
 	 *                  and the incoming segment acknowledges something not yet
-	 *                  sent (the segment carries an unaccaptable ACK) ...
+	 *                  sent (the segment carries an unacceptable ACK) ...
 	 *                  a reset is sent."
 	 *
 	 * Invalid ACK: reset will be sent by listening socket
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b907456..029c70d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -436,6 +436,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 	u16 flags;
 
 	BUG_ON(len > skb->len);
+
+ 	clear_all_retrans_hints(tp);
 	nsize = skb_headlen(skb) - len;
 	if (nsize < 0)
 		nsize = 0;
@@ -599,7 +601,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
    for TCP options, but includes only bare TCP header.
 
    tp->rx_opt.mss_clamp is mss negotiated at connection setup.
-   It is minumum of user_mss and mss received with SYN.
+   It is minimum of user_mss and mss received with SYN.
    It also does not include TCP options.
 
    tp->pmtu_cookie is last pmtu, seen by this function.
@@ -1171,7 +1173,7 @@ u32 __tcp_select_window(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	/* MSS for the peer's data.  Previous verions used mss_clamp
+	/* MSS for the peer's data.  Previous versions used mss_clamp
 	 * here.  I don't know if the value based on our guesses
 	 * of peer's MSS is better for the performance.  It's more correct
 	 * but may be worse for the performance because of rcv_mss
@@ -1260,7 +1262,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		BUG_ON(tcp_skb_pcount(skb) != 1 ||
 		       tcp_skb_pcount(next_skb) != 1);
 
-		/* Ok.  We will be able to collapse the packet. */
+		/* changing transmit queue under us so clear hints */
+		clear_all_retrans_hints(tp);
+
+		/* Ok.	We will be able to collapse the packet. */
 		__skb_unlink(next_skb, &sk->sk_write_queue);
 
 		memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
@@ -1330,6 +1335,8 @@ void tcp_simple_retransmit(struct sock *sk)
 		}
 	}
 
+	clear_all_retrans_hints(tp);
+
 	if (!lost)
 		return;
 
@@ -1361,7 +1368,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	int err;
 
 	/* Do not sent more than we queued. 1/4 is reserved for possible
-	 * copying overhead: frgagmentation, tunneling, mangling etc.
+	 * copying overhead: fragmentation, tunneling, mangling etc.
 	 */
 	if (atomic_read(&sk->sk_wmem_alloc) >
 	    min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
@@ -1468,13 +1475,25 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	int packet_cnt = tp->lost_out;
+	int packet_cnt;
+
+	if (tp->retransmit_skb_hint) {
+		skb = tp->retransmit_skb_hint;
+		packet_cnt = tp->retransmit_cnt_hint;
+	}else{
+		skb = sk->sk_write_queue.next;
+		packet_cnt = 0;
+	}
 
 	/* First pass: retransmit lost packets. */
-	if (packet_cnt) {
-		sk_stream_for_retrans_queue(skb, sk) {
+	if (tp->lost_out) {
+		sk_stream_for_retrans_queue_from(skb, sk) {
 			__u8 sacked = TCP_SKB_CB(skb)->sacked;
 
+			/* we could do better than to assign each time */
+			tp->retransmit_skb_hint = skb;
+			tp->retransmit_cnt_hint = packet_cnt;
+
 			/* Assume this retransmit will generate
 			 * only one packet for congestion window
 			 * calculation purposes.  This works because
@@ -1485,10 +1504,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
 				return;
 
-			if (sacked&TCPCB_LOST) {
+			if (sacked & TCPCB_LOST) {
 				if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
-					if (tcp_retransmit_skb(sk, skb))
+					if (tcp_retransmit_skb(sk, skb)) {
+						tp->retransmit_skb_hint = NULL;
 						return;
+					}
 					if (icsk->icsk_ca_state != TCP_CA_Loss)
 						NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
 					else
@@ -1501,8 +1522,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 									  TCP_RTO_MAX);
 				}
 
-				packet_cnt -= tcp_skb_pcount(skb);
-				if (packet_cnt <= 0)
+				packet_cnt += tcp_skb_pcount(skb);
+				if (packet_cnt >= tp->lost_out)
 					break;
 			}
 		}
@@ -1528,9 +1549,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	if (tcp_may_send_now(sk, tp))
 		return;
 
-	packet_cnt = 0;
+	if (tp->forward_skb_hint) {
+		skb = tp->forward_skb_hint;
+		packet_cnt = tp->forward_cnt_hint;
+	} else{
+		skb = sk->sk_write_queue.next;
+		packet_cnt = 0;
+	}
+
+	sk_stream_for_retrans_queue_from(skb, sk) {
+		tp->forward_cnt_hint = packet_cnt;
+		tp->forward_skb_hint = skb;
 
-	sk_stream_for_retrans_queue(skb, sk) {
 		/* Similar to the retransmit loop above we
 		 * can pretend that the retransmitted SKB
 		 * we send out here will be composed of one
@@ -1547,8 +1577,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			continue;
 
 		/* Ok, retransmit it. */
-		if (tcp_retransmit_skb(sk, skb))
+		if (tcp_retransmit_skb(sk, skb)) {
+			tp->forward_skb_hint = NULL;
 			break;
+		}
 
 		if (skb == skb_peek(&sk->sk_write_queue))
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -2058,3 +2090,4 @@ EXPORT_SYMBOL(tcp_connect);
 EXPORT_SYMBOL(tcp_make_synack);
 EXPORT_SYMBOL(tcp_simple_retransmit);
 EXPORT_SYMBOL(tcp_sync_mss);
+EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor);
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 327770b..26d7486 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -20,20 +20,20 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 				    u32 in_flight, int flag)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	if (in_flight < tp->snd_cwnd)
+
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-	if (tp->snd_cwnd <= tp->snd_ssthresh) {
-		tp->snd_cwnd++;
-	} else {
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
 		tp->snd_cwnd_cnt++;
 		if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
-			tp->snd_cwnd++;
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
 			tp->snd_cwnd_cnt = 0;
 		}
 	}
-	tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
-	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
 static u32 tcp_scalable_ssthresh(struct sock *sk)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 415ee47..e188095 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -58,7 +58,7 @@ static void tcp_write_err(struct sock *sk)
  * to prevent DoS attacks. It is called when a retransmission timeout
  * or zero probe timeout occurs on orphaned socket.
  *
- * Criterium is still not confirmed experimentally and may change.
+ * Criteria is still not confirmed experimentally and may change.
  * We kill the socket, if:
  * 1. If number of orphaned sockets exceeds an administratively configured
  *    limit.
@@ -132,7 +132,7 @@ static int tcp_write_timeout(struct sock *sk)
 			   hole detection. :-(
 
 			   It is place to make it. It is not made. I do not want
-			   to make it. It is disguisting. It does not work in any
+			   to make it. It is disgusting. It does not work in any
 			   case. Let me to cite the same draft, which requires for
 			   us to implement this:
 
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 93c5f92..4376814 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -236,8 +236,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 			/* We don't have enough RTT samples to do the Vegas
 			 * calculation, so we'll behave like Reno.
 			 */
-			if (tp->snd_cwnd > tp->snd_ssthresh)
-				tp->snd_cwnd++;
+			tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, cnt);
 		} else {
 			u32 rtt, target_cwnd, diff;
 
@@ -275,7 +274,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 			 */
 			diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
 
-			if (tp->snd_cwnd < tp->snd_ssthresh) {
+			if (tp->snd_cwnd <= tp->snd_ssthresh) {
 				/* Slow start.  */
 				if (diff > gamma) {
 					/* Going too fast. Time to slow down
@@ -295,6 +294,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 							    V_PARAM_SHIFT)+1);
 
 				}
+				tcp_slow_start(tp);
 			} else {
 				/* Congestion avoidance. */
 				u32 next_snd_cwnd;
@@ -327,37 +327,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 				else if (next_snd_cwnd < tp->snd_cwnd)
 					tp->snd_cwnd--;
 			}
-		}
 
-		/* Wipe the slate clean for the next RTT. */
-		vegas->cntRTT = 0;
-		vegas->minRTT = 0x7fffffff;
+			if (tp->snd_cwnd < 2)
+				tp->snd_cwnd = 2;
+			else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
+				tp->snd_cwnd = tp->snd_cwnd_clamp;
+		}
 	}
 
-	/* The following code is executed for every ack we receive,
-	 * except for conditions checked in should_advance_cwnd()
-	 * before the call to tcp_cong_avoid(). Mainly this means that
-	 * we only execute this code if the ack actually acked some
-	 * data.
-	 */
-
-	/* If we are in slow start, increase our cwnd in response to this ACK.
-	 * (If we are not in slow start then we are in congestion avoidance,
-	 * and adjust our congestion window only once per RTT. See the code
-	 * above.)
-	 */
-	if (tp->snd_cwnd <= tp->snd_ssthresh)
-		tp->snd_cwnd++;
-
-	/* to keep cwnd from growing without bound */
-	tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
-
-	/* Make sure that we are never so timid as to reduce our cwnd below
-	 * 2 MSS.
-	 *
-	 * Going below 2 MSS would risk huge delayed ACKs from our receiver.
-	 */
-	tp->snd_cwnd = max(tp->snd_cwnd, 2U);
+	/* Wipe the slate clean for the next RTT. */
+	vegas->cntRTT = 0;
+	vegas->minRTT = 0x7fffffff;
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e0bd101..2422a5f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -761,7 +761,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
 {
-	return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+	return __skb_checksum_complete(skb);
 }
 
 static __inline__ int udp_checksum_complete(struct sk_buff *skb)
@@ -1100,11 +1100,8 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
 	if (uh->check == 0) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	} else if (skb->ip_summed == CHECKSUM_HW) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
 		if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
-			return 0;
-		LIMIT_NETDEBUG(KERN_DEBUG "udp v4 hw csum failure.\n");
-		skb->ip_summed = CHECKSUM_NONE;
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
 		skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
author	Jeff Garzik <jgarzik@pobox.com>	2005-11-11 10:51:24 (GMT)
committer	Jeff Garzik <jgarzik@pobox.com>	2005-11-11 10:51:24 (GMT)
commit	3b621ee5df437d3f332a635ab6421aaa61a7dc2b (patch)
tree	c4a5236cee8eb7418770802313d36a55f1cc0b1e /net/ipv4
parent	7211bb9b64f17b23834d91fc3d0c1d78671ee9a8 (diff)
parent	5e04e7fe774794b837e1d3897e6b96ae2d06679a (diff)
download	linux-fsl-qoriq-3b621ee5df437d3f332a635ab6421aaa61a7dc2b.tar.xz