Merge remote-tracking branch 'linus/master' into merge

Conflicts: Documentation/hwmon/ina2xx arch/powerpc/Kconfig arch/powerpc/boot/dts/b4860emu.dts arch/powerpc/boot/dts/b4qds.dtsi arch/powerpc/boot/dts/fsl/b4si-post.dtsi arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi arch/powerpc/boot/dts/p1023rdb.dts arch/powerpc/boot/dts/t4240emu.dts arch/powerpc/boot/dts/t4240qds.dts arch/powerpc/configs/85xx/p1023_defconfig arch/powerpc/configs/corenet32_smp_defconfig arch/powerpc/configs/corenet64_smp_defconfig arch/powerpc/configs/mpc85xx_smp_defconfig arch/powerpc/include/asm/cputable.h arch/powerpc/include/asm/device.h arch/powerpc/include/asm/epapr_hcalls.h arch/powerpc/include/asm/kvm_host.h arch/powerpc/include/asm/mpic.h arch/powerpc/include/asm/pci.h arch/powerpc/include/asm/ppc-opcode.h arch/powerpc/include/asm/ppc_asm.h arch/powerpc/include/asm/reg_booke.h arch/powerpc/kernel/epapr_paravirt.c arch/powerpc/kernel/process.c arch/powerpc/kernel/prom.c arch/powerpc/kernel/setup-common.c arch/powerpc/kernel/setup_32.c arch/powerpc/kernel/setup_64.c arch/powerpc/kernel/smp.c arch/powerpc/kernel/swsusp_asm64.S arch/powerpc/kernel/swsusp_booke.S arch/powerpc/kvm/book3s_pr.c arch/powerpc/kvm/booke.c arch/powerpc/kvm/booke.h arch/powerpc/kvm/e500.c arch/powerpc/kvm/e500.h arch/powerpc/kvm/e500_emulate.c arch/powerpc/kvm/e500mc.c arch/powerpc/kvm/powerpc.c arch/powerpc/perf/e6500-pmu.c arch/powerpc/platforms/85xx/Kconfig arch/powerpc/platforms/85xx/Makefile arch/powerpc/platforms/85xx/b4_qds.c arch/powerpc/platforms/85xx/c293pcie.c arch/powerpc/platforms/85xx/corenet_ds.c arch/powerpc/platforms/85xx/corenet_ds.h arch/powerpc/platforms/85xx/p1023_rds.c arch/powerpc/platforms/85xx/p2041_rdb.c arch/powerpc/platforms/85xx/p3041_ds.c arch/powerpc/platforms/85xx/p4080_ds.c arch/powerpc/platforms/85xx/p5020_ds.c arch/powerpc/platforms/85xx/p5040_ds.c arch/powerpc/platforms/85xx/smp.c arch/powerpc/platforms/85xx/t4240_qds.c arch/powerpc/platforms/Kconfig arch/powerpc/sysdev/Makefile arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c arch/powerpc/sysdev/fsl_msi.c arch/powerpc/sysdev/fsl_pci.c arch/powerpc/sysdev/fsl_pci.h arch/powerpc/sysdev/fsl_soc.h arch/powerpc/sysdev/mpic.c arch/powerpc/sysdev/mpic_timer.c drivers/Kconfig drivers/clk/Kconfig drivers/clk/clk-ppc-corenet.c drivers/cpufreq/Kconfig.powerpc drivers/cpufreq/Makefile drivers/cpufreq/ppc-corenet-cpufreq.c drivers/crypto/caam/Kconfig drivers/crypto/caam/Makefile drivers/crypto/caam/ctrl.c drivers/crypto/caam/desc_constr.h drivers/crypto/caam/intern.h drivers/crypto/caam/jr.c drivers/crypto/caam/regs.h drivers/dma/fsldma.c drivers/hwmon/ina2xx.c drivers/iommu/Kconfig drivers/iommu/fsl_pamu.c drivers/iommu/fsl_pamu.h drivers/iommu/fsl_pamu_domain.c drivers/iommu/fsl_pamu_domain.h drivers/misc/Makefile drivers/mmc/card/block.c drivers/mmc/core/core.c drivers/mmc/host/sdhci-esdhc.h drivers/mmc/host/sdhci-pltfm.c drivers/mtd/nand/fsl_ifc_nand.c drivers/net/ethernet/freescale/gianfar.c drivers/net/ethernet/freescale/gianfar.h drivers/net/ethernet/freescale/gianfar_ethtool.c drivers/net/phy/at803x.c drivers/net/phy/phy_device.c drivers/net/phy/vitesse.c drivers/pci/msi.c drivers/staging/Kconfig drivers/staging/Makefile drivers/uio/Kconfig drivers/uio/Makefile drivers/uio/uio.c drivers/usb/host/ehci-fsl.c drivers/vfio/Kconfig drivers/vfio/Makefile include/crypto/algapi.h include/linux/iommu.h include/linux/mmc/sdhci.h include/linux/msi.h include/linux/netdev_features.h include/linux/phy.h include/linux/skbuff.h include/net/ip.h include/uapi/linux/vfio.h net/core/ethtool.c net/ipv4/route.c net/ipv6/route.c
author: Scott Wood <scottwood@freescale.com> 2013-12-14 01:15:24 (GMT)
committer: Scott Wood <scottwood@freescale.com> 2013-12-14 01:15:24 (GMT)
commit: b7c81aa3ab2ac2c140e278b6d0e9a0b95112cf0b (patch)
tree: 87828aaf5f82c7042bfc0307bc4ac499e10f93fb /net/netfilter
parent: 22c782a4b14773fab7eab3c1db54ad7ad077e9b8 (diff)
parent: 78fd82238d0e5716578c326404184a27ba67fd6e (diff)
download: linux-fsl-qoriq-b7c81aa3ab2ac2c140e278b6d0e9a0b95112cf0b.tar.xz
99 files changed, 12129 insertions, 3534 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 56d22ca..c3398cd 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -408,21 +408,62 @@ config NF_NAT_TFTP
 	depends on NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_TFTP
 
+config NETFILTER_SYNPROXY
+	tristate
+
 endif # NF_CONNTRACK
 
-# transparent proxy support
-config NETFILTER_TPROXY
-	tristate "Transparent proxying support"
-	depends on IP_NF_MANGLE
-	depends on NETFILTER_ADVANCED
-	help
-	  This option enables transparent proxying support, that is,
-	  support for handling non-locally bound IPv4 TCP and UDP sockets.
-	  For it to work you will have to configure certain iptables rules
-	  and use policy routing. For more information on how to set it up
-	  see Documentation/networking/tproxy.txt.
+config NF_TABLES
+	depends on NETFILTER_NETLINK
+	tristate "Netfilter nf_tables support"
 
-	  To compile it as a module, choose M here.  If unsure, say N.
+config NFT_EXTHDR
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables IPv6 exthdr module"
+
+config NFT_META
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables meta module"
+
+config NFT_CT
+	depends on NF_TABLES
+	depends on NF_CONNTRACK
+	tristate "Netfilter nf_tables conntrack module"
+
+config NFT_RBTREE
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables rbtree set module"
+
+config NFT_HASH
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables hash set module"
+
+config NFT_COUNTER
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables counter module"
+
+config NFT_LOG
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables log module"
+
+config NFT_LIMIT
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables limit module"
+
+config NFT_NAT
+	depends on NF_TABLES
+	depends on NF_CONNTRACK
+	depends on NF_NAT
+	tristate "Netfilter nf_tables nat module"
+
+config NFT_COMPAT
+	depends on NF_TABLES
+	depends on NETFILTER_XTABLES
+	tristate "Netfilter x_tables over nf_tables module"
+	help
+	  This is required if you intend to use any of existing
+	  x_tables match/target extensions over the nf_tables
+	  framework.
 
 config NETFILTER_XTABLES
 	tristate "Netfilter Xtables support (required for ip_tables)"
@@ -720,10 +761,10 @@ config NETFILTER_XT_TARGET_TEE
 	this clone be rerouted to another nexthop.
 
 config NETFILTER_XT_TARGET_TPROXY
-	tristate '"TPROXY" target support'
-	depends on NETFILTER_TPROXY
+	tristate '"TPROXY" target transparent proxying support'
 	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
+	depends on IP_NF_MANGLE
 	select NF_DEFRAG_IPV4
 	select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
 	help
@@ -731,6 +772,9 @@ config NETFILTER_XT_TARGET_TPROXY
 	  REDIRECT.  It can only be used in the mangle table and is useful
 	  to redirect traffic to a transparent proxy.  It does _not_ depend
 	  on Netfilter connection tracking and NAT, unlike REDIRECT.
+	  For it to work you will have to configure certain iptables rules
+	  and use policy routing. For more information on how to set it up
+	  see Documentation/networking/tproxy.txt.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
@@ -865,7 +909,7 @@ config NETFILTER_XT_MATCH_CONNLABEL
 	  connection simultaneously.
 
 config NETFILTER_XT_MATCH_CONNLIMIT
-	tristate '"connlimit" match support"'
+	tristate '"connlimit" match support'
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	---help---
@@ -1180,10 +1224,10 @@ config NETFILTER_XT_MATCH_SCTP
 
 config NETFILTER_XT_MATCH_SOCKET
 	tristate '"socket" match support'
-	depends on NETFILTER_TPROXY
 	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	depends on !NF_CONNTRACK || NF_CONNTRACK
+	depends on (IPV6 || IPV6=n)
 	select NF_DEFRAG_IPV4
 	select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
 	help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index a1abf87..394483b 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,6 @@
 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
 
-nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
+nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
@@ -61,8 +61,26 @@ obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o
 obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
 obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
 
-# transparent proxy support
-obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
+# SYNPROXY
+obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
+
+# nf_tables
+nf_tables-objs += nf_tables_core.o nf_tables_api.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o
+nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
+
+obj-$(CONFIG_NF_TABLES)		+= nf_tables.o
+obj-$(CONFIG_NFT_COMPAT)	+= nft_compat.o
+obj-$(CONFIG_NFT_EXTHDR)	+= nft_exthdr.o
+obj-$(CONFIG_NFT_META)		+= nft_meta.o
+obj-$(CONFIG_NFT_CT)		+= nft_ct.o
+obj-$(CONFIG_NFT_LIMIT)		+= nft_limit.o
+obj-$(CONFIG_NFT_NAT)		+= nft_nat.o
+#nf_tables-objs			+= nft_meta_target.o
+obj-$(CONFIG_NFT_RBTREE)	+= nft_rbtree.o
+obj-$(CONFIG_NFT_HASH)		+= nft_hash.o
+obj-$(CONFIG_NFT_COUNTER)	+= nft_counter.o
+obj-$(CONFIG_NFT_LOG)		+= nft_log.o
 
 # generic X tables 
 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 857ca9f..1fbab0c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -146,7 +146,7 @@ unsigned int nf_iterate(struct list_head *head,
 		/* Optimization: we don't need to hold module
 		   reference here, since function can't sleep. --RR */
 repeat:
-		verdict = (*elemp)->hook(hook, skb, indev, outdev, okfn);
+		verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn);
 		if (verdict != NF_ACCEPT) {
 #ifdef CONFIG_NETFILTER_DEBUG
 			if (unlikely((verdict & NF_VERDICT_MASK)
@@ -234,12 +234,13 @@ EXPORT_SYMBOL(skb_make_writable);
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
    manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
+void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
+		__rcu __read_mostly;
 EXPORT_SYMBOL(ip_ct_attach);
 
-void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
+void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
 {
-	void (*attach)(struct sk_buff *, struct sk_buff *);
+	void (*attach)(struct sk_buff *, const struct sk_buff *);
 
 	if (skb->nfct) {
 		rcu_read_lock();
@@ -304,17 +305,26 @@ static struct pernet_operations netfilter_net_ops = {
 	.exit = netfilter_net_exit,
 };
 
-void __init netfilter_init(void)
+int __init netfilter_init(void)
 {
-	int i, h;
+	int i, h, ret;
+
 	for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) {
 		for (h = 0; h < NF_MAX_HOOKS; h++)
 			INIT_LIST_HEAD(&nf_hooks[i][h]);
 	}
 
-	if (register_pernet_subsys(&netfilter_net_ops) < 0)
-		panic("cannot create netfilter proc entry");
+	ret = register_pernet_subsys(&netfilter_net_ops);
+	if (ret < 0)
+		goto err;
+
+	ret = netfilter_log_init();
+	if (ret < 0)
+		goto err_pernet;
 
-	if (netfilter_log_init() < 0)
-		panic("cannot initialize nf_log");
+	return 0;
+err_pernet:
+	unregister_pernet_subsys(&netfilter_net_ops);
+err:
+	return ret;
 }
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
index ba36c28..a2d6263 100644
--- a/net/netfilter/ipset/Kconfig
+++ b/net/netfilter/ipset/Kconfig
@@ -1,7 +1,7 @@
 menuconfig IP_SET
 	tristate "IP set support"
 	depends on INET && NETFILTER
-	depends on NETFILTER_NETLINK
+	select NETFILTER_NETLINK
 	help
 	  This option adds IP set support to the kernel.
 	  In order to define and use the sets, you need the userspace utility
@@ -90,6 +90,15 @@ config IP_SET_HASH_IPPORTNET
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config IP_SET_HASH_NETPORTNET
+	tristate "hash:net,port,net set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:net,port,net set type support, by which
+	  one can store two IPv4/IPv6 subnets, and a protocol/port in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config IP_SET_HASH_NET
 	tristate "hash:net set support"
 	depends on IP_SET
@@ -99,6 +108,15 @@ config IP_SET_HASH_NET
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config IP_SET_HASH_NETNET
+	tristate "hash:net,net set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:net,net  set type support, by which
+	  one can store IPv4/IPv6 network address/prefix pairs in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config IP_SET_HASH_NETPORT
 	tristate "hash:net,port set support"
 	depends on IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
index 6e965ec..44b2d38 100644
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -20,6 +20,8 @@ obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
 obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o
 obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o
 obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o
+obj-$(CONFIG_IP_SET_HASH_NETNET) += ip_set_hash_netnet.o
+obj-$(CONFIG_IP_SET_HASH_NETPORTNET) += ip_set_hash_netportnet.o
 
 # list types
 obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 2524337..f2c7d83 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -8,38 +8,32 @@
 #ifndef __IP_SET_BITMAP_IP_GEN_H
 #define __IP_SET_BITMAP_IP_GEN_H
 
-#define CONCAT(a, b)		a##b
-#define TOKEN(a,b)		CONCAT(a, b)
-
-#define mtype_do_test		TOKEN(MTYPE, _do_test)
-#define mtype_gc_test		TOKEN(MTYPE, _gc_test)
-#define mtype_is_filled		TOKEN(MTYPE, _is_filled)
-#define mtype_do_add		TOKEN(MTYPE, _do_add)
-#define mtype_do_del		TOKEN(MTYPE, _do_del)
-#define mtype_do_list		TOKEN(MTYPE, _do_list)
-#define mtype_do_head		TOKEN(MTYPE, _do_head)
-#define mtype_adt_elem		TOKEN(MTYPE, _adt_elem)
-#define mtype_add_timeout	TOKEN(MTYPE, _add_timeout)
-#define mtype_gc_init		TOKEN(MTYPE, _gc_init)
-#define mtype_kadt		TOKEN(MTYPE, _kadt)
-#define mtype_uadt		TOKEN(MTYPE, _uadt)
-#define mtype_destroy		TOKEN(MTYPE, _destroy)
-#define mtype_flush		TOKEN(MTYPE, _flush)
-#define mtype_head		TOKEN(MTYPE, _head)
-#define mtype_same_set		TOKEN(MTYPE, _same_set)
-#define mtype_elem		TOKEN(MTYPE, _elem)
-#define mtype_test		TOKEN(MTYPE, _test)
-#define mtype_add		TOKEN(MTYPE, _add)
-#define mtype_del		TOKEN(MTYPE, _del)
-#define mtype_list		TOKEN(MTYPE, _list)
-#define mtype_gc		TOKEN(MTYPE, _gc)
+#define mtype_do_test		IPSET_TOKEN(MTYPE, _do_test)
+#define mtype_gc_test		IPSET_TOKEN(MTYPE, _gc_test)
+#define mtype_is_filled		IPSET_TOKEN(MTYPE, _is_filled)
+#define mtype_do_add		IPSET_TOKEN(MTYPE, _do_add)
+#define mtype_ext_cleanup	IPSET_TOKEN(MTYPE, _ext_cleanup)
+#define mtype_do_del		IPSET_TOKEN(MTYPE, _do_del)
+#define mtype_do_list		IPSET_TOKEN(MTYPE, _do_list)
+#define mtype_do_head		IPSET_TOKEN(MTYPE, _do_head)
+#define mtype_adt_elem		IPSET_TOKEN(MTYPE, _adt_elem)
+#define mtype_add_timeout	IPSET_TOKEN(MTYPE, _add_timeout)
+#define mtype_gc_init		IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_kadt		IPSET_TOKEN(MTYPE, _kadt)
+#define mtype_uadt		IPSET_TOKEN(MTYPE, _uadt)
+#define mtype_destroy		IPSET_TOKEN(MTYPE, _destroy)
+#define mtype_flush		IPSET_TOKEN(MTYPE, _flush)
+#define mtype_head		IPSET_TOKEN(MTYPE, _head)
+#define mtype_same_set		IPSET_TOKEN(MTYPE, _same_set)
+#define mtype_elem		IPSET_TOKEN(MTYPE, _elem)
+#define mtype_test		IPSET_TOKEN(MTYPE, _test)
+#define mtype_add		IPSET_TOKEN(MTYPE, _add)
+#define mtype_del		IPSET_TOKEN(MTYPE, _del)
+#define mtype_list		IPSET_TOKEN(MTYPE, _list)
+#define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
 #define mtype			MTYPE
 
-#define ext_timeout(e, m)	\
-	(unsigned long *)((e) + (m)->offset[IPSET_OFFSET_TIMEOUT])
-#define ext_counter(e, m)	\
-	(struct ip_set_counter *)((e) + (m)->offset[IPSET_OFFSET_COUNTER])
-#define get_ext(map, id)	((map)->extensions + (map)->dsize * (id))
+#define get_ext(set, map, id)	((map)->extensions + (set)->dsize * (id))
 
 static void
 mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -49,11 +43,22 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	init_timer(&map->gc);
 	map->gc.data = (unsigned long) set;
 	map->gc.function = gc;
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
 }
 
 static void
+mtype_ext_cleanup(struct ip_set *set)
+{
+	struct mtype *map = set->data;
+	u32 id;
+
+	for (id = 0; id < map->elements; id++)
+		if (test_bit(id, map->members))
+			ip_set_ext_destroy(set, get_ext(set, map, id));
+}
+
+static void
 mtype_destroy(struct ip_set *set)
 {
 	struct mtype *map = set->data;
@@ -62,8 +67,11 @@ mtype_destroy(struct ip_set *set)
 		del_timer_sync(&map->gc);
 
 	ip_set_free(map->members);
-	if (map->dsize)
+	if (set->dsize) {
+		if (set->extensions & IPSET_EXT_DESTROY)
+			mtype_ext_cleanup(set);
 		ip_set_free(map->extensions);
+	}
 	kfree(map);
 
 	set->data = NULL;
@@ -74,6 +82,8 @@ mtype_flush(struct ip_set *set)
 {
 	struct mtype *map = set->data;
 
+	if (set->extensions & IPSET_EXT_DESTROY)
+		mtype_ext_cleanup(set);
 	memset(map->members, 0, map->memsize);
 }
 
@@ -91,12 +101,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
 			  htonl(sizeof(*map) +
 				map->memsize +
-				map->dsize * map->elements)) ||
-	    (SET_WITH_TIMEOUT(set) &&
-	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) ||
-	    (SET_WITH_COUNTER(set) &&
-	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
-			   htonl(IPSET_FLAG_WITH_COUNTERS))))
+				set->dsize * map->elements)))
+		goto nla_put_failure;
+	if (unlikely(ip_set_put_flags(skb, set)))
 		goto nla_put_failure;
 	ipset_nest_end(skb, nested);
 
@@ -111,16 +118,16 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 {
 	struct mtype *map = set->data;
 	const struct mtype_adt_elem *e = value;
-	void *x = get_ext(map, e->id);
-	int ret = mtype_do_test(e, map);
+	void *x = get_ext(set, map, e->id);
+	int ret = mtype_do_test(e, map, set->dsize);
 
 	if (ret <= 0)
 		return ret;
 	if (SET_WITH_TIMEOUT(set) &&
-	    ip_set_timeout_expired(ext_timeout(x, map)))
+	    ip_set_timeout_expired(ext_timeout(x, set)))
 		return 0;
 	if (SET_WITH_COUNTER(set))
-		ip_set_update_counter(ext_counter(x, map), ext, mext, flags);
+		ip_set_update_counter(ext_counter(x, set), ext, mext, flags);
 	return 1;
 }
 
@@ -130,26 +137,30 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 {
 	struct mtype *map = set->data;
 	const struct mtype_adt_elem *e = value;
-	void *x = get_ext(map, e->id);
-	int ret = mtype_do_add(e, map, flags);
+	void *x = get_ext(set, map, e->id);
+	int ret = mtype_do_add(e, map, flags, set->dsize);
 
 	if (ret == IPSET_ADD_FAILED) {
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(x, map)))
+		    ip_set_timeout_expired(ext_timeout(x, set)))
 			ret = 0;
 		else if (!(flags & IPSET_FLAG_EXIST))
 			return -IPSET_ERR_EXIST;
+		/* Element is re-added, cleanup extensions */
+		ip_set_ext_destroy(set, x);
 	}
 
 	if (SET_WITH_TIMEOUT(set))
 #ifdef IP_SET_BITMAP_STORED_TIMEOUT
-		mtype_add_timeout(ext_timeout(x, map), e, ext, map, ret);
+		mtype_add_timeout(ext_timeout(x, set), e, ext, set, map, ret);
 #else
-		ip_set_timeout_set(ext_timeout(x, map), ext->timeout);
+		ip_set_timeout_set(ext_timeout(x, set), ext->timeout);
 #endif
 
 	if (SET_WITH_COUNTER(set))
-		ip_set_init_counter(ext_counter(x, map), ext);
+		ip_set_init_counter(ext_counter(x, set), ext);
+	if (SET_WITH_COMMENT(set))
+		ip_set_init_comment(ext_comment(x, set), ext);
 	return 0;
 }
 
@@ -159,16 +170,27 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 {
 	struct mtype *map = set->data;
 	const struct mtype_adt_elem *e = value;
-	const void *x = get_ext(map, e->id);
+	void *x = get_ext(set, map, e->id);
 
-	if (mtype_do_del(e, map) ||
-	    (SET_WITH_TIMEOUT(set) &&
-	     ip_set_timeout_expired(ext_timeout(x, map))))
+	if (mtype_do_del(e, map))
+		return -IPSET_ERR_EXIST;
+
+	ip_set_ext_destroy(set, x);
+	if (SET_WITH_TIMEOUT(set) &&
+	    ip_set_timeout_expired(ext_timeout(x, set)))
 		return -IPSET_ERR_EXIST;
 
 	return 0;
 }
 
+#ifndef IP_SET_BITMAP_STORED_TIMEOUT
+static inline bool
+mtype_is_filled(const struct mtype_elem *x)
+{
+	return true;
+}
+#endif
+
 static int
 mtype_list(const struct ip_set *set,
 	   struct sk_buff *skb, struct netlink_callback *cb)
@@ -176,20 +198,21 @@ mtype_list(const struct ip_set *set,
 	struct mtype *map = set->data;
 	struct nlattr *adt, *nested;
 	void *x;
-	u32 id, first = cb->args[2];
+	u32 id, first = cb->args[IPSET_CB_ARG0];
 
 	adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
 	if (!adt)
 		return -EMSGSIZE;
-	for (; cb->args[2] < map->elements; cb->args[2]++) {
-		id = cb->args[2];
-		x = get_ext(map, id);
+	for (; cb->args[IPSET_CB_ARG0] < map->elements;
+	     cb->args[IPSET_CB_ARG0]++) {
+		id = cb->args[IPSET_CB_ARG0];
+		x = get_ext(set, map, id);
 		if (!test_bit(id, map->members) ||
 		    (SET_WITH_TIMEOUT(set) &&
 #ifdef IP_SET_BITMAP_STORED_TIMEOUT
 		     mtype_is_filled((const struct mtype_elem *) x) &&
 #endif
-		     ip_set_timeout_expired(ext_timeout(x, map))))
+		     ip_set_timeout_expired(ext_timeout(x, set))))
 			continue;
 		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 		if (!nested) {
@@ -199,40 +222,27 @@ mtype_list(const struct ip_set *set,
 			} else
 				goto nla_put_failure;
 		}
-		if (mtype_do_list(skb, map, id))
+		if (mtype_do_list(skb, map, id, set->dsize))
 			goto nla_put_failure;
-		if (SET_WITH_TIMEOUT(set)) {
-#ifdef IP_SET_BITMAP_STORED_TIMEOUT
-			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-					  htonl(ip_set_timeout_stored(map, id,
-							ext_timeout(x, map)))))
-				goto nla_put_failure;
-#else
-			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-					  htonl(ip_set_timeout_get(
-							ext_timeout(x, map)))))
-				goto nla_put_failure;
-#endif
-		}
-		if (SET_WITH_COUNTER(set) &&
-		    ip_set_put_counter(skb, ext_counter(x, map)))
+		if (ip_set_put_extensions(skb, set, x,
+		    mtype_is_filled((const struct mtype_elem *) x)))
 			goto nla_put_failure;
 		ipset_nest_end(skb, nested);
 	}
 	ipset_nest_end(skb, adt);
 
 	/* Set listing finished */
-	cb->args[2] = 0;
+	cb->args[IPSET_CB_ARG0] = 0;
 
 	return 0;
 
 nla_put_failure:
 	nla_nest_cancel(skb, nested);
-	ipset_nest_end(skb, adt);
 	if (unlikely(id == first)) {
-		cb->args[2] = 0;
+		cb->args[IPSET_CB_ARG0] = 0;
 		return -EMSGSIZE;
 	}
+	ipset_nest_end(skb, adt);
 	return 0;
 }
 
@@ -241,21 +251,23 @@ mtype_gc(unsigned long ul_set)
 {
 	struct ip_set *set = (struct ip_set *) ul_set;
 	struct mtype *map = set->data;
-	const void *x;
+	void *x;
 	u32 id;
 
 	/* We run parallel with other readers (test element)
 	 * but adding/deleting new entries is locked out */
 	read_lock_bh(&set->lock);
 	for (id = 0; id < map->elements; id++)
-		if (mtype_gc_test(id, map)) {
-			x = get_ext(map, id);
-			if (ip_set_timeout_expired(ext_timeout(x, map)))
+		if (mtype_gc_test(id, map, set->dsize)) {
+			x = get_ext(set, map, id);
+			if (ip_set_timeout_expired(ext_timeout(x, set))) {
 				clear_bit(id, map->members);
+				ip_set_ext_destroy(set, x);
+			}
 		}
 	read_unlock_bh(&set->lock);
 
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
 }
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index f1a8128..6f1f9f4 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -25,12 +25,13 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_bitmap.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1	/* Counter support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1	   Counter support added */
+#define IPSET_TYPE_REV_MAX	2	/* Comment support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_bitmap:ip");
 
 #define MTYPE		bitmap_ip
@@ -44,10 +45,7 @@ struct bitmap_ip {
 	u32 elements;		/* number of max elements in the set */
 	u32 hosts;		/* number of hosts in a subnet */
 	size_t memsize;		/* members size */
-	size_t dsize;		/* extensions struct size */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
 	u8 netmask;		/* subnet netmask */
-	u32 timeout;		/* timeout parameter */
 	struct timer_list gc;	/* garbage collection */
 };
 
@@ -65,20 +63,21 @@ ip_to_id(const struct bitmap_ip *m, u32 ip)
 /* Common functions */
 
 static inline int
-bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
+bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e,
+		  struct bitmap_ip *map, size_t dsize)
 {
 	return !!test_bit(e->id, map->members);
 }
 
 static inline int
-bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map)
+bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize)
 {
 	return !!test_bit(id, map->members);
 }
 
 static inline int
 bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
-		 u32 flags)
+		 u32 flags, size_t dsize)
 {
 	return !!test_and_set_bit(e->id, map->members);
 }
@@ -90,7 +89,8 @@ bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
 }
 
 static inline int
-bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id)
+bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
+		  size_t dsize)
 {
 	return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
 			htonl(map->first_ip + id * map->hosts));
@@ -113,7 +113,7 @@ bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct bitmap_ip *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_ip_adt_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	u32 ip;
 
 	ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
@@ -131,9 +131,9 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
 {
 	struct bitmap_ip *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	u32 ip, ip_to;
+	u32 ip = 0, ip_to = 0;
 	struct bitmap_ip_adt_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	int ret = 0;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -200,7 +200,7 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
 	return x->first_ip == y->first_ip &&
 	       x->last_ip == y->last_ip &&
 	       x->netmask == y->netmask &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 	       a->extensions == b->extensions;
 }
 
@@ -209,25 +209,6 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
 struct bitmap_ip_elem {
 };
 
-/* Timeout variant */
-
-struct bitmap_ipt_elem {
-	unsigned long timeout;
-};
-
-/* Plain variant with counter */
-
-struct bitmap_ipc_elem {
-	struct ip_set_counter counter;
-};
-
-/* Timeout variant with counter */
-
-struct bitmap_ipct_elem {
-	unsigned long timeout;
-	struct ip_set_counter counter;
-};
-
 #include "ip_set_bitmap_gen.h"
 
 /* Create bitmap:ip type of sets */
@@ -240,8 +221,8 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (map->dsize) {
-		map->extensions = ip_set_alloc(map->dsize * elements);
+	if (set->dsize) {
+		map->extensions = ip_set_alloc(set->dsize * elements);
 		if (!map->extensions) {
 			kfree(map->members);
 			return false;
@@ -252,7 +233,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
 	map->elements = elements;
 	map->hosts = hosts;
 	map->netmask = netmask;
-	map->timeout = IPSET_NO_TIMEOUT;
+	set->timeout = IPSET_NO_TIMEOUT;
 
 	set->data = map;
 	set->family = NFPROTO_IPV4;
@@ -261,10 +242,11 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
 }
 
 static int
-bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+		 u32 flags)
 {
 	struct bitmap_ip *map;
-	u32 first_ip, last_ip, hosts, cadt_flags = 0;
+	u32 first_ip = 0, last_ip = 0, hosts;
 	u64 elements;
 	u8 netmask = 32;
 	int ret;
@@ -336,61 +318,15 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 
 	map->memsize = bitmap_bytes(0, elements - 1);
 	set->variant = &bitmap_ip;
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			map->dsize = sizeof(struct bitmap_ipct_elem);
-			map->offset[IPSET_OFFSET_TIMEOUT] =
-				offsetof(struct bitmap_ipct_elem, timeout);
-			map->offset[IPSET_OFFSET_COUNTER] =
-				offsetof(struct bitmap_ipct_elem, counter);
-
-			if (!init_map_ip(set, map, first_ip, last_ip,
-					 elements, hosts, netmask)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-
-			map->timeout = ip_set_timeout_uget(
-				tb[IPSET_ATTR_TIMEOUT]);
-			set->extensions |= IPSET_EXT_TIMEOUT;
-
-			bitmap_ip_gc_init(set, bitmap_ip_gc);
-		} else {
-			map->dsize = sizeof(struct bitmap_ipc_elem);
-			map->offset[IPSET_OFFSET_COUNTER] =
-				offsetof(struct bitmap_ipc_elem, counter);
-
-			if (!init_map_ip(set, map, first_ip, last_ip,
-					 elements, hosts, netmask)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		map->dsize = sizeof(struct bitmap_ipt_elem);
-		map->offset[IPSET_OFFSET_TIMEOUT] =
-			offsetof(struct bitmap_ipt_elem, timeout);
-
-		if (!init_map_ip(set, map, first_ip, last_ip,
-				 elements, hosts, netmask)) {
-			kfree(map);
-			return -ENOMEM;
-		}
-
-		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-		set->extensions |= IPSET_EXT_TIMEOUT;
-
+	set->dsize = ip_set_elem_len(set, tb, 0);
+	if (!init_map_ip(set, map, first_ip, last_ip,
+			 elements, hosts, netmask)) {
+		kfree(map);
+		return -ENOMEM;
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		bitmap_ip_gc_init(set, bitmap_ip_gc);
-	} else {
-		map->dsize = 0;
-		if (!init_map_ip(set, map, first_ip, last_ip,
-				 elements, hosts, netmask)) {
-			kfree(map);
-			return -ENOMEM;
-		}
 	}
 	return 0;
 }
@@ -401,8 +337,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
 	.features	= IPSET_TYPE_IP,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_IPV4,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= bitmap_ip_create,
 	.create_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
@@ -420,6 +356,7 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
 		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 3b30e0b..740eabe 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -25,12 +25,13 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_bitmap.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1	/* Counter support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1	   Counter support added */
+#define IPSET_TYPE_REV_MAX	2	/* Comment support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_bitmap:ip,mac");
 
 #define MTYPE		bitmap_ipmac
@@ -48,11 +49,8 @@ struct bitmap_ipmac {
 	u32 first_ip;		/* host byte order, included in range */
 	u32 last_ip;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
-	u32 timeout;		/* timeout value */
-	struct timer_list gc;	/* garbage collector */
 	size_t memsize;		/* members size */
-	size_t dsize;		/* size of element */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
+	struct timer_list gc;	/* garbage collector */
 };
 
 /* ADT structure for generic function args */
@@ -82,13 +80,13 @@ get_elem(void *extensions, u16 id, size_t dsize)
 
 static inline int
 bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
-		     const struct bitmap_ipmac *map)
+		     const struct bitmap_ipmac *map, size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem;
 
 	if (!test_bit(e->id, map->members))
 		return 0;
-	elem = get_elem(map->extensions, e->id, map->dsize);
+	elem = get_elem(map->extensions, e->id, dsize);
 	if (elem->filled == MAC_FILLED)
 		return e->ether == NULL ||
 		       ether_addr_equal(e->ether, elem->ether);
@@ -97,13 +95,13 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
 }
 
 static inline int
-bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map)
+bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem;
 
 	if (!test_bit(id, map->members))
 		return 0;
-	elem = get_elem(map->extensions, id, map->dsize);
+	elem = get_elem(map->extensions, id, dsize);
 	/* Timer not started for the incomplete elements */
 	return elem->filled == MAC_FILLED;
 }
@@ -117,13 +115,13 @@ bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem)
 static inline int
 bitmap_ipmac_add_timeout(unsigned long *timeout,
 			 const struct bitmap_ipmac_adt_elem *e,
-			 const struct ip_set_ext *ext,
+			 const struct ip_set_ext *ext, struct ip_set *set,
 			 struct bitmap_ipmac *map, int mode)
 {
 	u32 t = ext->timeout;
 
 	if (mode == IPSET_ADD_START_STORED_TIMEOUT) {
-		if (t == map->timeout)
+		if (t == set->timeout)
 			/* Timeout was not specified, get stored one */
 			t = *timeout;
 		ip_set_timeout_set(timeout, t);
@@ -142,11 +140,11 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
 
 static inline int
 bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
-		    struct bitmap_ipmac *map, u32 flags)
+		    struct bitmap_ipmac *map, u32 flags, size_t dsize)
 {
 	struct bitmap_ipmac_elem *elem;
 
-	elem = get_elem(map->extensions, e->id, map->dsize);
+	elem = get_elem(map->extensions, e->id, dsize);
 	if (test_and_set_bit(e->id, map->members)) {
 		if (elem->filled == MAC_FILLED) {
 			if (e->ether && (flags & IPSET_FLAG_EXIST))
@@ -178,22 +176,12 @@ bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
 	return !test_and_clear_bit(e->id, map->members);
 }
 
-static inline unsigned long
-ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout)
-{
-	const struct bitmap_ipmac_elem *elem =
-		get_elem(map->extensions, id, map->dsize);
-
-	return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) :
-					    *timeout;
-}
-
 static inline int
 bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
-		     u32 id)
+		     u32 id, size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem =
-		get_elem(map->extensions, id, map->dsize);
+		get_elem(map->extensions, id, dsize);
 
 	return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
 			       htonl(map->first_ip + id)) ||
@@ -216,7 +204,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct bitmap_ipmac *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_ipmac_adt_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	u32 ip;
 
 	/* MAC can be src only */
@@ -245,8 +233,8 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct bitmap_ipmac *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_ipmac_adt_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
-	u32 ip;
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip = 0;
 	int ret = 0;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -285,43 +273,12 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
 
 	return x->first_ip == y->first_ip &&
 	       x->last_ip == y->last_ip &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 	       a->extensions == b->extensions;
 }
 
 /* Plain variant */
 
-/* Timeout variant */
-
-struct bitmap_ipmact_elem {
-	struct {
-		unsigned char ether[ETH_ALEN];
-		unsigned char filled;
-	} __attribute__ ((aligned));
-	unsigned long timeout;
-};
-
-/* Plain variant with counter */
-
-struct bitmap_ipmacc_elem {
-	struct {
-		unsigned char ether[ETH_ALEN];
-		unsigned char filled;
-	} __attribute__ ((aligned));
-	struct ip_set_counter counter;
-};
-
-/* Timeout variant with counter */
-
-struct bitmap_ipmacct_elem {
-	struct {
-		unsigned char ether[ETH_ALEN];
-		unsigned char filled;
-	} __attribute__ ((aligned));
-	unsigned long timeout;
-	struct ip_set_counter counter;
-};
-
 #include "ip_set_bitmap_gen.h"
 
 /* Create bitmap:ip,mac type of sets */
@@ -330,11 +287,11 @@ static bool
 init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 	       u32 first_ip, u32 last_ip, u32 elements)
 {
-	map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize);
+	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (map->dsize) {
-		map->extensions = ip_set_alloc(map->dsize * elements);
+	if (set->dsize) {
+		map->extensions = ip_set_alloc(set->dsize * elements);
 		if (!map->extensions) {
 			kfree(map->members);
 			return false;
@@ -343,7 +300,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 	map->first_ip = first_ip;
 	map->last_ip = last_ip;
 	map->elements = elements;
-	map->timeout = IPSET_NO_TIMEOUT;
+	set->timeout = IPSET_NO_TIMEOUT;
 
 	set->data = map;
 	set->family = NFPROTO_IPV4;
@@ -352,10 +309,10 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 }
 
 static int
-bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
+bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 		    u32 flags)
 {
-	u32 first_ip, last_ip, cadt_flags = 0;
+	u32 first_ip = 0, last_ip = 0;
 	u64 elements;
 	struct bitmap_ipmac *map;
 	int ret;
@@ -399,57 +356,15 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 
 	map->memsize = bitmap_bytes(0, elements - 1);
 	set->variant = &bitmap_ipmac;
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			map->dsize = sizeof(struct bitmap_ipmacct_elem);
-			map->offset[IPSET_OFFSET_TIMEOUT] =
-				offsetof(struct bitmap_ipmacct_elem, timeout);
-			map->offset[IPSET_OFFSET_COUNTER] =
-				offsetof(struct bitmap_ipmacct_elem, counter);
-
-			if (!init_map_ipmac(set, map, first_ip, last_ip,
-					    elements)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-			map->timeout = ip_set_timeout_uget(
-				tb[IPSET_ATTR_TIMEOUT]);
-			set->extensions |= IPSET_EXT_TIMEOUT;
-			bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
-		} else {
-			map->dsize = sizeof(struct bitmap_ipmacc_elem);
-			map->offset[IPSET_OFFSET_COUNTER] =
-				offsetof(struct bitmap_ipmacc_elem, counter);
-
-			if (!init_map_ipmac(set, map, first_ip, last_ip,
-					    elements)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		map->dsize = sizeof(struct bitmap_ipmact_elem);
-		map->offset[IPSET_OFFSET_TIMEOUT] =
-			offsetof(struct bitmap_ipmact_elem, timeout);
-
-		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
-			kfree(map);
-			return -ENOMEM;
-		}
-		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-		set->extensions |= IPSET_EXT_TIMEOUT;
+	set->dsize = ip_set_elem_len(set, tb,
+				     sizeof(struct bitmap_ipmac_elem));
+	if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
+		kfree(map);
+		return -ENOMEM;
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
-	} else {
-		map->dsize = sizeof(struct bitmap_ipmac_elem);
-
-		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
-			kfree(map);
-			return -ENOMEM;
-		}
-		set->variant = &bitmap_ipmac;
 	}
 	return 0;
 }
@@ -460,8 +375,8 @@ static struct ip_set_type bitmap_ipmac_type = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_MAC,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_IPV4,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= bitmap_ipmac_create,
 	.create_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
@@ -478,6 +393,7 @@ static struct ip_set_type bitmap_ipmac_type = {
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
 		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 8207d1f..cf99676 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -20,12 +20,13 @@
 #include <linux/netfilter/ipset/ip_set_bitmap.h>
 #include <linux/netfilter/ipset/ip_set_getport.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1	/* Counter support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1	   Counter support added */
+#define IPSET_TYPE_REV_MAX	2	/* Comment support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("bitmap:port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_bitmap:port");
 
 #define MTYPE		bitmap_port
@@ -38,9 +39,6 @@ struct bitmap_port {
 	u16 last_port;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
-	size_t dsize;		/* extensions struct size */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
-	u32 timeout;		/* timeout parameter */
 	struct timer_list gc;	/* garbage collection */
 };
 
@@ -59,20 +57,20 @@ port_to_id(const struct bitmap_port *m, u16 port)
 
 static inline int
 bitmap_port_do_test(const struct bitmap_port_adt_elem *e,
-		    const struct bitmap_port *map)
+		    const struct bitmap_port *map, size_t dsize)
 {
 	return !!test_bit(e->id, map->members);
 }
 
 static inline int
-bitmap_port_gc_test(u16 id, const struct bitmap_port *map)
+bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize)
 {
 	return !!test_bit(id, map->members);
 }
 
 static inline int
 bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
-		   struct bitmap_port *map, u32 flags)
+		   struct bitmap_port *map, u32 flags, size_t dsize)
 {
 	return !!test_and_set_bit(e->id, map->members);
 }
@@ -85,7 +83,8 @@ bitmap_port_do_del(const struct bitmap_port_adt_elem *e,
 }
 
 static inline int
-bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id)
+bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
+		    size_t dsize)
 {
 	return nla_put_net16(skb, IPSET_ATTR_PORT,
 			     htons(map->first_port + id));
@@ -106,7 +105,7 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct bitmap_port *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_port_adt_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	__be16 __port;
 	u16 port = 0;
 
@@ -131,7 +130,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct bitmap_port *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_port_adt_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port;	/* wraparound */
 	u16 port_to;
 	int ret = 0;
@@ -191,7 +190,7 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
 
 	return x->first_port == y->first_port &&
 	       x->last_port == y->last_port &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 	       a->extensions == b->extensions;
 }
 
@@ -200,25 +199,6 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
 struct bitmap_port_elem {
 };
 
-/* Timeout variant */
-
-struct bitmap_portt_elem {
-	unsigned long timeout;
-};
-
-/* Plain variant with counter */
-
-struct bitmap_portc_elem {
-	struct ip_set_counter counter;
-};
-
-/* Timeout variant with counter */
-
-struct bitmap_portct_elem {
-	unsigned long timeout;
-	struct ip_set_counter counter;
-};
-
 #include "ip_set_bitmap_gen.h"
 
 /* Create bitmap:ip type of sets */
@@ -230,8 +210,8 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (map->dsize) {
-		map->extensions = ip_set_alloc(map->dsize * map->elements);
+	if (set->dsize) {
+		map->extensions = ip_set_alloc(set->dsize * map->elements);
 		if (!map->extensions) {
 			kfree(map->members);
 			return false;
@@ -239,7 +219,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 	}
 	map->first_port = first_port;
 	map->last_port = last_port;
-	map->timeout = IPSET_NO_TIMEOUT;
+	set->timeout = IPSET_NO_TIMEOUT;
 
 	set->data = map;
 	set->family = NFPROTO_UNSPEC;
@@ -248,11 +228,11 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 }
 
 static int
-bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+		   u32 flags)
 {
 	struct bitmap_port *map;
 	u16 first_port, last_port;
-	u32 cadt_flags = 0;
 
 	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -274,55 +254,16 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		return -ENOMEM;
 
 	map->elements = last_port - first_port + 1;
-	map->memsize = map->elements * sizeof(unsigned long);
+	map->memsize = bitmap_bytes(0, map->elements);
 	set->variant = &bitmap_port;
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			map->dsize = sizeof(struct bitmap_portct_elem);
-			map->offset[IPSET_OFFSET_TIMEOUT] =
-				offsetof(struct bitmap_portct_elem, timeout);
-			map->offset[IPSET_OFFSET_COUNTER] =
-				offsetof(struct bitmap_portct_elem, counter);
-			if (!init_map_port(set, map, first_port, last_port)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-
-			map->timeout =
-				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-			set->extensions |= IPSET_EXT_TIMEOUT;
-			bitmap_port_gc_init(set, bitmap_port_gc);
-		} else {
-			map->dsize = sizeof(struct bitmap_portc_elem);
-			map->offset[IPSET_OFFSET_COUNTER] =
-				offsetof(struct bitmap_portc_elem, counter);
-			if (!init_map_port(set, map, first_port, last_port)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		map->dsize = sizeof(struct bitmap_portt_elem);
-		map->offset[IPSET_OFFSET_TIMEOUT] =
-			offsetof(struct bitmap_portt_elem, timeout);
-		if (!init_map_port(set, map, first_port, last_port)) {
-			kfree(map);
-			return -ENOMEM;
-		}
-
-		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-		set->extensions |= IPSET_EXT_TIMEOUT;
+	set->dsize = ip_set_elem_len(set, tb, 0);
+	if (!init_map_port(set, map, first_port, last_port)) {
+		kfree(map);
+		return -ENOMEM;
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		bitmap_port_gc_init(set, bitmap_port_gc);
-	} else {
-		map->dsize = 0;
-		if (!init_map_port(set, map, first_port, last_port)) {
-			kfree(map);
-			return -ENOMEM;
-		}
-
 	}
 	return 0;
 }
@@ -333,8 +274,8 @@ static struct ip_set_type bitmap_port_type = {
 	.features	= IPSET_TYPE_PORT,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= bitmap_port_create,
 	.create_policy	= {
 		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
@@ -349,6 +290,7 @@ static struct ip_set_type bitmap_port_type = {
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
 		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index f771390..bac7e01 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -17,6 +17,8 @@
 #include <linux/spinlock.h>
 #include <linux/rculist.h>
 #include <net/netlink.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter/x_tables.h>
@@ -27,8 +29,17 @@ static LIST_HEAD(ip_set_type_list);		/* all registered set types */
 static DEFINE_MUTEX(ip_set_type_mutex);		/* protects ip_set_type_list */
 static DEFINE_RWLOCK(ip_set_ref_lock);		/* protects the set refs */
 
-static struct ip_set * __rcu *ip_set_list;	/* all individual sets */
-static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
+struct ip_set_net {
+	struct ip_set * __rcu *ip_set_list;	/* all individual sets */
+	ip_set_id_t	ip_set_max;	/* max number of sets */
+	int		is_deleted;	/* deleted by ip_set_net_exit */
+};
+static int ip_set_net_id __read_mostly;
+
+static inline struct ip_set_net *ip_set_pernet(struct net *net)
+{
+	return net_generic(net, ip_set_net_id);
+}
 
 #define IP_SET_INC	64
 #define STREQ(a, b)	(strncmp(a, b, IPSET_MAXNAMELEN) == 0)
@@ -45,8 +56,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
 /* When the nfnl mutex is held: */
 #define nfnl_dereference(p)		\
 	rcu_dereference_protected(p, 1)
-#define nfnl_set(id)			\
-	nfnl_dereference(ip_set_list)[id]
+#define nfnl_set(inst, id)			\
+	nfnl_dereference((inst)->ip_set_list)[id]
 
 /*
  * The set types are implemented in modules and registered set types
@@ -315,6 +326,60 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 }
 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
 
+typedef void (*destroyer)(void *);
+/* ipset data extension types, in size order */
+
+const struct ip_set_ext_type ip_set_extensions[] = {
+	[IPSET_EXT_ID_COUNTER] = {
+		.type	= IPSET_EXT_COUNTER,
+		.flag	= IPSET_FLAG_WITH_COUNTERS,
+		.len	= sizeof(struct ip_set_counter),
+		.align	= __alignof__(struct ip_set_counter),
+	},
+	[IPSET_EXT_ID_TIMEOUT] = {
+		.type	= IPSET_EXT_TIMEOUT,
+		.len	= sizeof(unsigned long),
+		.align	= __alignof__(unsigned long),
+	},
+	[IPSET_EXT_ID_COMMENT] = {
+		.type	 = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
+		.flag	 = IPSET_FLAG_WITH_COMMENT,
+		.len	 = sizeof(struct ip_set_comment),
+		.align	 = __alignof__(struct ip_set_comment),
+		.destroy = (destroyer) ip_set_comment_free,
+	},
+};
+EXPORT_SYMBOL_GPL(ip_set_extensions);
+
+static inline bool
+add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
+{
+	return ip_set_extensions[id].flag ?
+		(flags & ip_set_extensions[id].flag) :
+		!!tb[IPSET_ATTR_TIMEOUT];
+}
+
+size_t
+ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
+{
+	enum ip_set_ext_id id;
+	size_t offset = 0;
+	u32 cadt_flags = 0;
+
+	if (tb[IPSET_ATTR_CADT_FLAGS])
+		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+	for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
+		if (!add_extension(id, cadt_flags, tb))
+			continue;
+		offset += ALIGN(len + offset, ip_set_extensions[id].align);
+		set->offset[id] = offset;
+		set->extensions |= ip_set_extensions[id].type;
+		offset += ip_set_extensions[id].len;
+	}
+	return len + offset;
+}
+EXPORT_SYMBOL_GPL(ip_set_elem_len);
+
 int
 ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 		      struct ip_set_ext *ext)
@@ -334,6 +399,12 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 			ext->packets = be64_to_cpu(nla_get_be64(
 						   tb[IPSET_ATTR_PACKETS]));
 	}
+	if (tb[IPSET_ATTR_COMMENT]) {
+		if (!(set->extensions & IPSET_EXT_COMMENT))
+			return -IPSET_ERR_COMMENT;
+		ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ip_set_get_extensions);
@@ -374,13 +445,14 @@ __ip_set_put(struct ip_set *set)
  */
 
 static inline struct ip_set *
-ip_set_rcu_get(ip_set_id_t index)
+ip_set_rcu_get(struct net *net, ip_set_id_t index)
 {
 	struct ip_set *set;
+	struct ip_set_net *inst = ip_set_pernet(net);
 
 	rcu_read_lock();
 	/* ip_set_list itself needs to be protected */
-	set = rcu_dereference(ip_set_list)[index];
+	set = rcu_dereference(inst->ip_set_list)[index];
 	rcu_read_unlock();
 
 	return set;
@@ -390,7 +462,8 @@ int
 ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 	    const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(index);
+	struct ip_set *set = ip_set_rcu_get(
+			dev_net(par->in ? par->in : par->out), index);
 	int ret = 0;
 
 	BUG_ON(set == NULL);
@@ -428,7 +501,8 @@ int
 ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
 	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(index);
+	struct ip_set *set = ip_set_rcu_get(
+			dev_net(par->in ? par->in : par->out), index);
 	int ret;
 
 	BUG_ON(set == NULL);
@@ -450,7 +524,8 @@ int
 ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
 	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(index);
+	struct ip_set *set = ip_set_rcu_get(
+			dev_net(par->in ? par->in : par->out), index);
 	int ret = 0;
 
 	BUG_ON(set == NULL);
@@ -474,14 +549,15 @@ EXPORT_SYMBOL_GPL(ip_set_del);
  *
  */
 ip_set_id_t
-ip_set_get_byname(const char *name, struct ip_set **set)
+ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
 {
 	ip_set_id_t i, index = IPSET_INVALID_ID;
 	struct ip_set *s;
+	struct ip_set_net *inst = ip_set_pernet(net);
 
 	rcu_read_lock();
-	for (i = 0; i < ip_set_max; i++) {
-		s = rcu_dereference(ip_set_list)[i];
+	for (i = 0; i < inst->ip_set_max; i++) {
+		s = rcu_dereference(inst->ip_set_list)[i];
 		if (s != NULL && STREQ(s->name, name)) {
 			__ip_set_get(s);
 			index = i;
@@ -501,17 +577,26 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname);
  * to be valid, after calling this function.
  *
  */
-void
-ip_set_put_byindex(ip_set_id_t index)
+
+static inline void
+__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
 {
 	struct ip_set *set;
 
 	rcu_read_lock();
-	set = rcu_dereference(ip_set_list)[index];
+	set = rcu_dereference(inst->ip_set_list)[index];
 	if (set != NULL)
 		__ip_set_put(set);
 	rcu_read_unlock();
 }
+
+void
+ip_set_put_byindex(struct net *net, ip_set_id_t index)
+{
+	struct ip_set_net *inst = ip_set_pernet(net);
+
+	__ip_set_put_byindex(inst, index);
+}
 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
 
 /*
@@ -522,9 +607,9 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex);
  *
  */
 const char *
-ip_set_name_byindex(ip_set_id_t index)
+ip_set_name_byindex(struct net *net, ip_set_id_t index)
 {
-	const struct ip_set *set = ip_set_rcu_get(index);
+	const struct ip_set *set = ip_set_rcu_get(net, index);
 
 	BUG_ON(set == NULL);
 	BUG_ON(set->ref == 0);
@@ -546,14 +631,15 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex);
  * The nfnl mutex is used in the function.
  */
 ip_set_id_t
-ip_set_nfnl_get(const char *name)
+ip_set_nfnl_get(struct net *net, const char *name)
 {
 	ip_set_id_t i, index = IPSET_INVALID_ID;
 	struct ip_set *s;
+	struct ip_set_net *inst = ip_set_pernet(net);
 
 	nfnl_lock(NFNL_SUBSYS_IPSET);
-	for (i = 0; i < ip_set_max; i++) {
-		s = nfnl_set(i);
+	for (i = 0; i < inst->ip_set_max; i++) {
+		s = nfnl_set(inst, i);
 		if (s != NULL && STREQ(s->name, name)) {
 			__ip_set_get(s);
 			index = i;
@@ -573,15 +659,16 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
  * The nfnl mutex is used in the function.
  */
 ip_set_id_t
-ip_set_nfnl_get_byindex(ip_set_id_t index)
+ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
 {
 	struct ip_set *set;
+	struct ip_set_net *inst = ip_set_pernet(net);
 
-	if (index > ip_set_max)
+	if (index > inst->ip_set_max)
 		return IPSET_INVALID_ID;
 
 	nfnl_lock(NFNL_SUBSYS_IPSET);
-	set = nfnl_set(index);
+	set = nfnl_set(inst, index);
 	if (set)
 		__ip_set_get(set);
 	else
@@ -600,13 +687,17 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
  * The nfnl mutex is used in the function.
  */
 void
-ip_set_nfnl_put(ip_set_id_t index)
+ip_set_nfnl_put(struct net *net, ip_set_id_t index)
 {
 	struct ip_set *set;
+	struct ip_set_net *inst = ip_set_pernet(net);
+
 	nfnl_lock(NFNL_SUBSYS_IPSET);
-	set = nfnl_set(index);
-	if (set != NULL)
-		__ip_set_put(set);
+	if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
+		set = nfnl_set(inst, index);
+		if (set != NULL)
+			__ip_set_put(set);
+	}
 	nfnl_unlock(NFNL_SUBSYS_IPSET);
 }
 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
@@ -664,14 +755,14 @@ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
 };
 
 static struct ip_set *
-find_set_and_id(const char *name, ip_set_id_t *id)
+find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
 {
 	struct ip_set *set = NULL;
 	ip_set_id_t i;
 
 	*id = IPSET_INVALID_ID;
-	for (i = 0; i < ip_set_max; i++) {
-		set = nfnl_set(i);
+	for (i = 0; i < inst->ip_set_max; i++) {
+		set = nfnl_set(inst, i);
 		if (set != NULL && STREQ(set->name, name)) {
 			*id = i;
 			break;
@@ -681,22 +772,23 @@ find_set_and_id(const char *name, ip_set_id_t *id)
 }
 
 static inline struct ip_set *
-find_set(const char *name)
+find_set(struct ip_set_net *inst, const char *name)
 {
 	ip_set_id_t id;
 
-	return find_set_and_id(name, &id);
+	return find_set_and_id(inst, name, &id);
 }
 
 static int
-find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
+find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
+	     struct ip_set **set)
 {
 	struct ip_set *s;
 	ip_set_id_t i;
 
 	*index = IPSET_INVALID_ID;
-	for (i = 0;  i < ip_set_max; i++) {
-		s = nfnl_set(i);
+	for (i = 0;  i < inst->ip_set_max; i++) {
+		s = nfnl_set(inst, i);
 		if (s == NULL) {
 			if (*index == IPSET_INVALID_ID)
 				*index = i;
@@ -725,6 +817,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	      const struct nlmsghdr *nlh,
 	      const struct nlattr * const attr[])
 {
+	struct net *net = sock_net(ctnl);
+	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set, *clash = NULL;
 	ip_set_id_t index = IPSET_INVALID_ID;
 	struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
@@ -783,7 +877,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 		goto put_out;
 	}
 
-	ret = set->type->create(set, tb, flags);
+	ret = set->type->create(net, set, tb, flags);
 	if (ret != 0)
 		goto put_out;
 
@@ -794,7 +888,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	 * by the nfnl mutex. Find the first free index in ip_set_list
 	 * and check clashing.
 	 */
-	ret = find_free_id(set->name, &index, &clash);
+	ret = find_free_id(inst, set->name, &index, &clash);
 	if (ret == -EEXIST) {
 		/* If this is the same set and requested, ignore error */
 		if ((flags & IPSET_FLAG_EXIST) &&
@@ -807,9 +901,9 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 		goto cleanup;
 	} else if (ret == -IPSET_ERR_MAX_SETS) {
 		struct ip_set **list, **tmp;
-		ip_set_id_t i = ip_set_max + IP_SET_INC;
+		ip_set_id_t i = inst->ip_set_max + IP_SET_INC;
 
-		if (i < ip_set_max || i == IPSET_INVALID_ID)
+		if (i < inst->ip_set_max || i == IPSET_INVALID_ID)
 			/* Wraparound */
 			goto cleanup;
 
@@ -817,14 +911,14 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 		if (!list)
 			goto cleanup;
 		/* nfnl mutex is held, both lists are valid */
-		tmp = nfnl_dereference(ip_set_list);
-		memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max);
-		rcu_assign_pointer(ip_set_list, list);
+		tmp = nfnl_dereference(inst->ip_set_list);
+		memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
+		rcu_assign_pointer(inst->ip_set_list, list);
 		/* Make sure all current packets have passed through */
 		synchronize_net();
 		/* Use new list */
-		index = ip_set_max;
-		ip_set_max = i;
+		index = inst->ip_set_max;
+		inst->ip_set_max = i;
 		kfree(tmp);
 		ret = 0;
 	} else if (ret)
@@ -834,7 +928,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	 * Finally! Add our shiny new set to the list, and be done.
 	 */
 	pr_debug("create: '%s' created with index %u!\n", set->name, index);
-	nfnl_set(index) = set;
+	nfnl_set(inst, index) = set;
 
 	return ret;
 
@@ -857,12 +951,12 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
 };
 
 static void
-ip_set_destroy_set(ip_set_id_t index)
+ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index)
 {
-	struct ip_set *set = nfnl_set(index);
+	struct ip_set *set = nfnl_set(inst, index);
 
 	pr_debug("set: %s\n",  set->name);
-	nfnl_set(index) = NULL;
+	nfnl_set(inst, index) = NULL;
 
 	/* Must call it without holding any lock */
 	set->variant->destroy(set);
@@ -875,6 +969,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 	       const struct nlmsghdr *nlh,
 	       const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *s;
 	ip_set_id_t i;
 	int ret = 0;
@@ -894,21 +989,22 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 	 */
 	read_lock_bh(&ip_set_ref_lock);
 	if (!attr[IPSET_ATTR_SETNAME]) {
-		for (i = 0; i < ip_set_max; i++) {
-			s = nfnl_set(i);
+		for (i = 0; i < inst->ip_set_max; i++) {
+			s = nfnl_set(inst, i);
 			if (s != NULL && s->ref) {
 				ret = -IPSET_ERR_BUSY;
 				goto out;
 			}
 		}
 		read_unlock_bh(&ip_set_ref_lock);
-		for (i = 0; i < ip_set_max; i++) {
-			s = nfnl_set(i);
+		for (i = 0; i < inst->ip_set_max; i++) {
+			s = nfnl_set(inst, i);
 			if (s != NULL)
-				ip_set_destroy_set(i);
+				ip_set_destroy_set(inst, i);
 		}
 	} else {
-		s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i);
+		s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
+				    &i);
 		if (s == NULL) {
 			ret = -ENOENT;
 			goto out;
@@ -918,7 +1014,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 		}
 		read_unlock_bh(&ip_set_ref_lock);
 
-		ip_set_destroy_set(i);
+		ip_set_destroy_set(inst, i);
 	}
 	return 0;
 out:
@@ -943,6 +1039,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
 	     const struct nlmsghdr *nlh,
 	     const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *s;
 	ip_set_id_t i;
 
@@ -950,13 +1047,13 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
 		return -IPSET_ERR_PROTOCOL;
 
 	if (!attr[IPSET_ATTR_SETNAME]) {
-		for (i = 0; i < ip_set_max; i++) {
-			s = nfnl_set(i);
+		for (i = 0; i < inst->ip_set_max; i++) {
+			s = nfnl_set(inst, i);
 			if (s != NULL)
 				ip_set_flush_set(s);
 		}
 	} else {
-		s = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+		s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 		if (s == NULL)
 			return -ENOENT;
 
@@ -982,6 +1079,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	      const struct nlmsghdr *nlh,
 	      const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set, *s;
 	const char *name2;
 	ip_set_id_t i;
@@ -992,7 +1090,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 		     attr[IPSET_ATTR_SETNAME2] == NULL))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1003,8 +1101,8 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	}
 
 	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
-	for (i = 0; i < ip_set_max; i++) {
-		s = nfnl_set(i);
+	for (i = 0; i < inst->ip_set_max; i++) {
+		s = nfnl_set(inst, i);
 		if (s != NULL && STREQ(s->name, name2)) {
 			ret = -IPSET_ERR_EXIST_SETNAME2;
 			goto out;
@@ -1031,6 +1129,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 	    const struct nlmsghdr *nlh,
 	    const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *from, *to;
 	ip_set_id_t from_id, to_id;
 	char from_name[IPSET_MAXNAMELEN];
@@ -1040,11 +1139,13 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 		     attr[IPSET_ATTR_SETNAME2] == NULL))
 		return -IPSET_ERR_PROTOCOL;
 
-	from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id);
+	from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
+			       &from_id);
 	if (from == NULL)
 		return -ENOENT;
 
-	to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id);
+	to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]),
+			     &to_id);
 	if (to == NULL)
 		return -IPSET_ERR_EXIST_SETNAME2;
 
@@ -1052,7 +1153,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 	 * Not an artificial restriction anymore, as we must prevent
 	 * possible loops created by swapping in setlist type of sets. */
 	if (!(from->type->features == to->type->features &&
-	      from->type->family == to->type->family))
+	      from->family == to->family))
 		return -IPSET_ERR_TYPE_MISMATCH;
 
 	strncpy(from_name, from->name, IPSET_MAXNAMELEN);
@@ -1061,8 +1162,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 
 	write_lock_bh(&ip_set_ref_lock);
 	swap(from->ref, to->ref);
-	nfnl_set(from_id) = to;
-	nfnl_set(to_id) = from;
+	nfnl_set(inst, from_id) = to;
+	nfnl_set(inst, to_id) = from;
 	write_unlock_bh(&ip_set_ref_lock);
 
 	return 0;
@@ -1081,9 +1182,12 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 static int
 ip_set_dump_done(struct netlink_callback *cb)
 {
-	if (cb->args[2]) {
-		pr_debug("release set %s\n", nfnl_set(cb->args[1])->name);
-		ip_set_put_byindex((ip_set_id_t) cb->args[1]);
+	struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
+	if (cb->args[IPSET_CB_ARG0]) {
+		pr_debug("release set %s\n",
+			 nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name);
+		__ip_set_put_byindex(inst,
+			(ip_set_id_t) cb->args[IPSET_CB_INDEX]);
 	}
 	return 0;
 }
@@ -1101,7 +1205,7 @@ dump_attrs(struct nlmsghdr *nlh)
 }
 
 static int
-dump_init(struct netlink_callback *cb)
+dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
 	int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
@@ -1114,21 +1218,22 @@ dump_init(struct netlink_callback *cb)
 	nla_parse(cda, IPSET_ATTR_CMD_MAX,
 		  attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
 
-	/* cb->args[0] : dump single set/all sets
-	 *         [1] : set index
-	 *         [..]: type specific
+	/* cb->args[IPSET_CB_NET]:	net namespace
+	 *         [IPSET_CB_DUMP]:	dump single set/all sets
+	 *         [IPSET_CB_INDEX]: 	set index
+	 *         [IPSET_CB_ARG0]:	type specific
 	 */
 
 	if (cda[IPSET_ATTR_SETNAME]) {
 		struct ip_set *set;
 
-		set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]),
+		set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
 				      &index);
 		if (set == NULL)
 			return -ENOENT;
 
 		dump_type = DUMP_ONE;
-		cb->args[1] = index;
+		cb->args[IPSET_CB_INDEX] = index;
 	} else
 		dump_type = DUMP_ALL;
 
@@ -1136,7 +1241,8 @@ dump_init(struct netlink_callback *cb)
 		u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
 		dump_type |= (f << 16);
 	}
-	cb->args[0] = dump_type;
+	cb->args[IPSET_CB_NET] = (unsigned long)inst;
+	cb->args[IPSET_CB_DUMP] = dump_type;
 
 	return 0;
 }
@@ -1148,11 +1254,12 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 	struct ip_set *set = NULL;
 	struct nlmsghdr *nlh = NULL;
 	unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
+	struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
 	u32 dump_type, dump_flags;
 	int ret = 0;
 
-	if (!cb->args[0]) {
-		ret = dump_init(cb);
+	if (!cb->args[IPSET_CB_DUMP]) {
+		ret = dump_init(cb, inst);
 		if (ret < 0) {
 			nlh = nlmsg_hdr(cb->skb);
 			/* We have to create and send the error message
@@ -1163,18 +1270,19 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 		}
 	}
 
-	if (cb->args[1] >= ip_set_max)
+	if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max)
 		goto out;
 
-	dump_type = DUMP_TYPE(cb->args[0]);
-	dump_flags = DUMP_FLAGS(cb->args[0]);
-	max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
+	dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]);
+	dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]);
+	max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1
+				    : inst->ip_set_max;
 dump_last:
-	pr_debug("args[0]: %u %u args[1]: %ld\n",
-		 dump_type, dump_flags, cb->args[1]);
-	for (; cb->args[1] < max; cb->args[1]++) {
-		index = (ip_set_id_t) cb->args[1];
-		set = nfnl_set(index);
+	pr_debug("dump type, flag: %u %u index: %ld\n",
+		 dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
+	for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
+		index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
+		set = nfnl_set(inst, index);
 		if (set == NULL) {
 			if (dump_type == DUMP_ONE) {
 				ret = -ENOENT;
@@ -1190,7 +1298,7 @@ dump_last:
 		     !!(set->type->features & IPSET_DUMP_LAST)))
 			continue;
 		pr_debug("List set: %s\n", set->name);
-		if (!cb->args[2]) {
+		if (!cb->args[IPSET_CB_ARG0]) {
 			/* Start listing: make sure set won't be destroyed */
 			pr_debug("reference set\n");
 			__ip_set_get(set);
@@ -1207,7 +1315,7 @@ dump_last:
 			goto nla_put_failure;
 		if (dump_flags & IPSET_FLAG_LIST_SETNAME)
 			goto next_set;
-		switch (cb->args[2]) {
+		switch (cb->args[IPSET_CB_ARG0]) {
 		case 0:
 			/* Core header data */
 			if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
@@ -1227,7 +1335,7 @@ dump_last:
 			read_lock_bh(&set->lock);
 			ret = set->variant->list(set, skb, cb);
 			read_unlock_bh(&set->lock);
-			if (!cb->args[2])
+			if (!cb->args[IPSET_CB_ARG0])
 				/* Set is done, proceed with next one */
 				goto next_set;
 			goto release_refcount;
@@ -1236,8 +1344,8 @@ dump_last:
 	/* If we dump all sets, continue with dumping last ones */
 	if (dump_type == DUMP_ALL) {
 		dump_type = DUMP_LAST;
-		cb->args[0] = dump_type | (dump_flags << 16);
-		cb->args[1] = 0;
+		cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
+		cb->args[IPSET_CB_INDEX] = 0;
 		goto dump_last;
 	}
 	goto out;
@@ -1246,15 +1354,15 @@ nla_put_failure:
 	ret = -EFAULT;
 next_set:
 	if (dump_type == DUMP_ONE)
-		cb->args[1] = IPSET_INVALID_ID;
+		cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID;
 	else
-		cb->args[1]++;
+		cb->args[IPSET_CB_INDEX]++;
 release_refcount:
 	/* If there was an error or set is done, release set */
-	if (ret || !cb->args[2]) {
-		pr_debug("release set %s\n", nfnl_set(index)->name);
-		ip_set_put_byindex(index);
-		cb->args[2] = 0;
+	if (ret || !cb->args[IPSET_CB_ARG0]) {
+		pr_debug("release set %s\n", nfnl_set(inst, index)->name);
+		__ip_set_put_byindex(inst, index);
+		cb->args[IPSET_CB_ARG0] = 0;
 	}
 out:
 	if (nlh) {
@@ -1356,6 +1464,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
 	    const struct nlmsghdr *nlh,
 	    const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
 	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
 	const struct nlattr *nla;
@@ -1374,7 +1483,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
 		       attr[IPSET_ATTR_LINENO] == NULL))))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1410,6 +1519,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
 	    const struct nlmsghdr *nlh,
 	    const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
 	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
 	const struct nlattr *nla;
@@ -1428,7 +1538,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
 		       attr[IPSET_ATTR_LINENO] == NULL))))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1464,6 +1574,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
 	     const struct nlmsghdr *nlh,
 	     const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
 	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
 	int ret = 0;
@@ -1474,7 +1585,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
 		     !flag_nested(attr[IPSET_ATTR_DATA])))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1489,8 +1600,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
 	if (ret == -EAGAIN)
 		ret = 1;
 
-	return (ret < 0 && ret != -ENOTEMPTY) ? ret :
-		ret > 0 ? 0 : -IPSET_ERR_EXIST;
+	return ret > 0 ? 0 : -IPSET_ERR_EXIST;
 }
 
 /* Get headed data of a set */
@@ -1500,6 +1610,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
 	      const struct nlmsghdr *nlh,
 	      const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	const struct ip_set *set;
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
@@ -1509,7 +1620,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
 		     attr[IPSET_ATTR_SETNAME] == NULL))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1734,8 +1845,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 	unsigned int *op;
 	void *data;
 	int copylen = *len, ret = 0;
+	struct net *net = sock_net(sk);
+	struct ip_set_net *inst = ip_set_pernet(net);
 
-	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 	if (optval != SO_IP_SET)
 		return -EBADF;
@@ -1784,22 +1897,39 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 		}
 		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
 		nfnl_lock(NFNL_SUBSYS_IPSET);
-		find_set_and_id(req_get->set.name, &id);
+		find_set_and_id(inst, req_get->set.name, &id);
 		req_get->set.index = id;
 		nfnl_unlock(NFNL_SUBSYS_IPSET);
 		goto copy;
 	}
+	case IP_SET_OP_GET_FNAME: {
+		struct ip_set_req_get_set_family *req_get = data;
+		ip_set_id_t id;
+
+		if (*len != sizeof(struct ip_set_req_get_set_family)) {
+			ret = -EINVAL;
+			goto done;
+		}
+		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
+		nfnl_lock(NFNL_SUBSYS_IPSET);
+		find_set_and_id(inst, req_get->set.name, &id);
+		req_get->set.index = id;
+		if (id != IPSET_INVALID_ID)
+			req_get->family = nfnl_set(inst, id)->family;
+		nfnl_unlock(NFNL_SUBSYS_IPSET);
+		goto copy;
+	}
 	case IP_SET_OP_GET_BYINDEX: {
 		struct ip_set_req_get_set *req_get = data;
 		struct ip_set *set;
 
 		if (*len != sizeof(struct ip_set_req_get_set) ||
-		    req_get->set.index >= ip_set_max) {
+		    req_get->set.index >= inst->ip_set_max) {
 			ret = -EINVAL;
 			goto done;
 		}
 		nfnl_lock(NFNL_SUBSYS_IPSET);
-		set = nfnl_set(req_get->set.index);
+		set = nfnl_set(inst, req_get->set.index);
 		strncpy(req_get->set.name, set ? set->name : "",
 			IPSET_MAXNAMELEN);
 		nfnl_unlock(NFNL_SUBSYS_IPSET);
@@ -1828,49 +1958,81 @@ static struct nf_sockopt_ops so_set __read_mostly = {
 	.owner		= THIS_MODULE,
 };
 
-static int __init
-ip_set_init(void)
+static int __net_init
+ip_set_net_init(struct net *net)
 {
+	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set **list;
-	int ret;
 
-	if (max_sets)
-		ip_set_max = max_sets;
-	if (ip_set_max >= IPSET_INVALID_ID)
-		ip_set_max = IPSET_INVALID_ID - 1;
+	inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX;
+	if (inst->ip_set_max >= IPSET_INVALID_ID)
+		inst->ip_set_max = IPSET_INVALID_ID - 1;
 
-	list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL);
+	list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL);
 	if (!list)
 		return -ENOMEM;
+	inst->is_deleted = 0;
+	rcu_assign_pointer(inst->ip_set_list, list);
+	pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
+	return 0;
+}
 
-	rcu_assign_pointer(ip_set_list, list);
-	ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
+static void __net_exit
+ip_set_net_exit(struct net *net)
+{
+	struct ip_set_net *inst = ip_set_pernet(net);
+
+	struct ip_set *set = NULL;
+	ip_set_id_t i;
+
+	inst->is_deleted = 1; /* flag for ip_set_nfnl_put */
+
+	for (i = 0; i < inst->ip_set_max; i++) {
+		set = nfnl_set(inst, i);
+		if (set != NULL)
+			ip_set_destroy_set(inst, i);
+	}
+	kfree(rcu_dereference_protected(inst->ip_set_list, 1));
+}
+
+static struct pernet_operations ip_set_net_ops = {
+	.init	= ip_set_net_init,
+	.exit   = ip_set_net_exit,
+	.id	= &ip_set_net_id,
+	.size	= sizeof(struct ip_set_net)
+};
+
+
+static int __init
+ip_set_init(void)
+{
+	int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
 	if (ret != 0) {
 		pr_err("ip_set: cannot register with nfnetlink.\n");
-		kfree(list);
 		return ret;
 	}
 	ret = nf_register_sockopt(&so_set);
 	if (ret != 0) {
 		pr_err("SO_SET registry failed: %d\n", ret);
 		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-		kfree(list);
 		return ret;
 	}
-
-	pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
+	ret = register_pernet_subsys(&ip_set_net_ops);
+	if (ret) {
+		pr_err("ip_set: cannot register pernet_subsys.\n");
+		nf_unregister_sockopt(&so_set);
+		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+		return ret;
+	}
 	return 0;
 }
 
 static void __exit
 ip_set_fini(void)
 {
-	struct ip_set **list = rcu_dereference_protected(ip_set_list, 1);
-
-	/* There can't be any existing set */
+	unregister_pernet_subsys(&ip_set_net_ops);
 	nf_unregister_sockopt(&so_set);
 	nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-	kfree(list);
 	pr_debug("these are the famous last words\n");
 }
 
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 6fdf88a..29fb01d 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -102,9 +102,25 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
 	int protocol = iph->protocol;
 
 	/* See comments at tcp_match in ip_tables.c */
-	if (protocol <= 0 || (ntohs(iph->frag_off) & IP_OFFSET))
+	if (protocol <= 0)
 		return false;
 
+	if (ntohs(iph->frag_off) & IP_OFFSET)
+		switch (protocol) {
+		case IPPROTO_TCP:
+		case IPPROTO_SCTP:
+		case IPPROTO_UDP:
+		case IPPROTO_UDPLITE:
+		case IPPROTO_ICMP:
+			/* Port info not available for fragment offset > 0 */
+			return false;
+		default:
+			/* Other protocols doesn't have ports,
+			   so we can match fragments */
+			*proto = protocol;
+			return true;
+		}
+
 	return get_port(skb, protocol, protooff, src, port, proto);
 }
 EXPORT_SYMBOL_GPL(ip_set_get_ip4_port);
@@ -116,12 +132,12 @@ ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
 {
 	int protoff;
 	u8 nexthdr;
-	__be16 frag_off;
+	__be16 frag_off = 0;
 
 	nexthdr = ipv6_hdr(skb)->nexthdr;
 	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
 				   &frag_off);
-	if (protoff < 0)
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
 		return false;
 
 	return get_port(skb, nexthdr, protoff, src, port, proto);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 57beb17..be6932a 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -15,8 +15,7 @@
 #define rcu_dereference_bh(p)	rcu_dereference(p)
 #endif
 
-#define CONCAT(a, b)		a##b
-#define TOKEN(a, b)		CONCAT(a, b)
+#define rcu_dereference_bh_nfnl(p)	rcu_dereference_bh_check(p, 1)
 
 /* Hashing which uses arrays to resolve clashing. The hash table is resized
  * (doubled) when searching becomes too long.
@@ -78,10 +77,14 @@ struct htable {
 
 #define hbucket(h, i)		(&((h)->bucket[i]))
 
+#ifndef IPSET_NET_COUNT
+#define IPSET_NET_COUNT		1
+#endif
+
 /* Book-keeping of the prefixes added to the set */
 struct net_prefixes {
-	u8 cidr;		/* the different cidr values in the set */
-	u32 nets;		/* number of elements per cidr */
+	u32 nets[IPSET_NET_COUNT]; /* number of elements per cidr */
+	u8 cidr[IPSET_NET_COUNT];  /* the different cidr values in the set */
 };
 
 /* Compute the hash table size */
@@ -114,23 +117,6 @@ htable_bits(u32 hashsize)
 	return bits;
 }
 
-/* Destroy the hashtable part of the set */
-static void
-ahash_destroy(struct htable *t)
-{
-	struct hbucket *n;
-	u32 i;
-
-	for (i = 0; i < jhash_size(t->htable_bits); i++) {
-		n = hbucket(t, i);
-		if (n->size)
-			/* FIXME: use slab cache */
-			kfree(n->value);
-	}
-
-	ip_set_free(t);
-}
-
 static int
 hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 {
@@ -156,30 +142,30 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 }
 
 #ifdef IP_SET_HASH_WITH_NETS
+#if IPSET_NET_COUNT > 1
+#define __CIDR(cidr, i)		(cidr[i])
+#else
+#define __CIDR(cidr, i)		(cidr)
+#endif
 #ifdef IP_SET_HASH_WITH_NETS_PACKED
 /* When cidr is packed with nomatch, cidr - 1 is stored in the entry */
-#define CIDR(cidr)		(cidr + 1)
+#define CIDR(cidr, i)		(__CIDR(cidr, i) + 1)
 #else
-#define CIDR(cidr)		(cidr)
+#define CIDR(cidr, i)		(__CIDR(cidr, i))
 #endif
 
 #define SET_HOST_MASK(family)	(family == AF_INET ? 32 : 128)
 
 #ifdef IP_SET_HASH_WITH_MULTI
-#define NETS_LENGTH(family)	(SET_HOST_MASK(family) + 1)
+#define NLEN(family)		(SET_HOST_MASK(family) + 1)
 #else
-#define NETS_LENGTH(family)	SET_HOST_MASK(family)
+#define NLEN(family)		SET_HOST_MASK(family)
 #endif
 
 #else
-#define NETS_LENGTH(family)	0
+#define NLEN(family)		0
 #endif /* IP_SET_HASH_WITH_NETS */
 
-#define ext_timeout(e, h)	\
-(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_TIMEOUT])
-#define ext_counter(e, h)	\
-(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_COUNTER])
-
 #endif /* _IP_SET_HASH_GEN_H */
 
 /* Family dependent templates */
@@ -194,6 +180,8 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 #undef mtype_data_next
 #undef mtype_elem
 
+#undef mtype_ahash_destroy
+#undef mtype_ext_cleanup
 #undef mtype_add_cidr
 #undef mtype_del_cidr
 #undef mtype_ahash_memsize
@@ -220,41 +208,43 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 
 #undef HKEY
 
-#define mtype_data_equal	TOKEN(MTYPE, _data_equal)
+#define mtype_data_equal	IPSET_TOKEN(MTYPE, _data_equal)
 #ifdef IP_SET_HASH_WITH_NETS
-#define mtype_do_data_match	TOKEN(MTYPE, _do_data_match)
+#define mtype_do_data_match	IPSET_TOKEN(MTYPE, _do_data_match)
 #else
 #define mtype_do_data_match(d)	1
 #endif
-#define mtype_data_set_flags	TOKEN(MTYPE, _data_set_flags)
-#define mtype_data_reset_flags	TOKEN(MTYPE, _data_reset_flags)
-#define mtype_data_netmask	TOKEN(MTYPE, _data_netmask)
-#define mtype_data_list		TOKEN(MTYPE, _data_list)
-#define mtype_data_next		TOKEN(MTYPE, _data_next)
-#define mtype_elem		TOKEN(MTYPE, _elem)
-#define mtype_add_cidr		TOKEN(MTYPE, _add_cidr)
-#define mtype_del_cidr		TOKEN(MTYPE, _del_cidr)
-#define mtype_ahash_memsize	TOKEN(MTYPE, _ahash_memsize)
-#define mtype_flush		TOKEN(MTYPE, _flush)
-#define mtype_destroy		TOKEN(MTYPE, _destroy)
-#define mtype_gc_init		TOKEN(MTYPE, _gc_init)
-#define mtype_same_set		TOKEN(MTYPE, _same_set)
-#define mtype_kadt		TOKEN(MTYPE, _kadt)
-#define mtype_uadt		TOKEN(MTYPE, _uadt)
+#define mtype_data_set_flags	IPSET_TOKEN(MTYPE, _data_set_flags)
+#define mtype_data_reset_elem	IPSET_TOKEN(MTYPE, _data_reset_elem)
+#define mtype_data_reset_flags	IPSET_TOKEN(MTYPE, _data_reset_flags)
+#define mtype_data_netmask	IPSET_TOKEN(MTYPE, _data_netmask)
+#define mtype_data_list		IPSET_TOKEN(MTYPE, _data_list)
+#define mtype_data_next		IPSET_TOKEN(MTYPE, _data_next)
+#define mtype_elem		IPSET_TOKEN(MTYPE, _elem)
+#define mtype_ahash_destroy	IPSET_TOKEN(MTYPE, _ahash_destroy)
+#define mtype_ext_cleanup	IPSET_TOKEN(MTYPE, _ext_cleanup)
+#define mtype_add_cidr		IPSET_TOKEN(MTYPE, _add_cidr)
+#define mtype_del_cidr		IPSET_TOKEN(MTYPE, _del_cidr)
+#define mtype_ahash_memsize	IPSET_TOKEN(MTYPE, _ahash_memsize)
+#define mtype_flush		IPSET_TOKEN(MTYPE, _flush)
+#define mtype_destroy		IPSET_TOKEN(MTYPE, _destroy)
+#define mtype_gc_init		IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_same_set		IPSET_TOKEN(MTYPE, _same_set)
+#define mtype_kadt		IPSET_TOKEN(MTYPE, _kadt)
+#define mtype_uadt		IPSET_TOKEN(MTYPE, _uadt)
 #define mtype			MTYPE
 
-#define mtype_elem		TOKEN(MTYPE, _elem)
-#define mtype_add		TOKEN(MTYPE, _add)
-#define mtype_del		TOKEN(MTYPE, _del)
-#define mtype_test_cidrs	TOKEN(MTYPE, _test_cidrs)
-#define mtype_test		TOKEN(MTYPE, _test)
-#define mtype_expire		TOKEN(MTYPE, _expire)
-#define mtype_resize		TOKEN(MTYPE, _resize)
-#define mtype_head		TOKEN(MTYPE, _head)
-#define mtype_list		TOKEN(MTYPE, _list)
-#define mtype_gc		TOKEN(MTYPE, _gc)
-#define mtype_variant		TOKEN(MTYPE, _variant)
-#define mtype_data_match	TOKEN(MTYPE, _data_match)
+#define mtype_add		IPSET_TOKEN(MTYPE, _add)
+#define mtype_del		IPSET_TOKEN(MTYPE, _del)
+#define mtype_test_cidrs	IPSET_TOKEN(MTYPE, _test_cidrs)
+#define mtype_test		IPSET_TOKEN(MTYPE, _test)
+#define mtype_expire		IPSET_TOKEN(MTYPE, _expire)
+#define mtype_resize		IPSET_TOKEN(MTYPE, _resize)
+#define mtype_head		IPSET_TOKEN(MTYPE, _head)
+#define mtype_list		IPSET_TOKEN(MTYPE, _list)
+#define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
+#define mtype_variant		IPSET_TOKEN(MTYPE, _variant)
+#define mtype_data_match	IPSET_TOKEN(MTYPE, _data_match)
 
 #ifndef HKEY_DATALEN
 #define HKEY_DATALEN		sizeof(struct mtype_elem)
@@ -269,13 +259,10 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 
 /* The generic hash structure */
 struct htype {
-	struct htable *table;	/* the hash table */
+	struct htable __rcu *table; /* the hash table */
 	u32 maxelem;		/* max elements in the hash */
 	u32 elements;		/* current element (vs timeout) */
 	u32 initval;		/* random jhash init value */
-	u32 timeout;		/* timeout value, if enabled */
-	size_t dsize;		/* data struct size */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
 	struct timer_list gc;	/* garbage collection when timeout enabled */
 	struct mtype_elem next; /* temporary storage for uadd */
 #ifdef IP_SET_HASH_WITH_MULTI
@@ -297,56 +284,60 @@ struct htype {
 /* Network cidr size book keeping when the hash stores different
  * sized networks */
 static void
-mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length)
+mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 {
 	int i, j;
 
 	/* Add in increasing prefix order, so larger cidr first */
-	for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) {
+	for (i = 0, j = -1; i < nets_length && h->nets[i].nets[n]; i++) {
 		if (j != -1)
 			continue;
-		else if (h->nets[i].cidr < cidr)
+		else if (h->nets[i].cidr[n] < cidr)
 			j = i;
-		else if (h->nets[i].cidr == cidr) {
-			h->nets[i].nets++;
+		else if (h->nets[i].cidr[n] == cidr) {
+			h->nets[i].nets[n]++;
 			return;
 		}
 	}
 	if (j != -1) {
 		for (; i > j; i--) {
-			h->nets[i].cidr = h->nets[i - 1].cidr;
-			h->nets[i].nets = h->nets[i - 1].nets;
+			h->nets[i].cidr[n] = h->nets[i - 1].cidr[n];
+			h->nets[i].nets[n] = h->nets[i - 1].nets[n];
 		}
 	}
-	h->nets[i].cidr = cidr;
-	h->nets[i].nets = 1;
+	h->nets[i].cidr[n] = cidr;
+	h->nets[i].nets[n] = 1;
 }
 
 static void
-mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length)
+mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 {
-	u8 i, j;
-
-	for (i = 0; i < nets_length - 1 && h->nets[i].cidr != cidr; i++)
-		;
-	h->nets[i].nets--;
-
-	if (h->nets[i].nets != 0)
-		return;
-
-	for (j = i; j < nets_length - 1 && h->nets[j].nets; j++) {
-		h->nets[j].cidr = h->nets[j + 1].cidr;
-		h->nets[j].nets = h->nets[j + 1].nets;
+	u8 i, j, net_end = nets_length - 1;
+
+	for (i = 0; i < nets_length; i++) {
+	        if (h->nets[i].cidr[n] != cidr)
+	                continue;
+                if (h->nets[i].nets[n] > 1 || i == net_end ||
+                    h->nets[i + 1].nets[n] == 0) {
+                        h->nets[i].nets[n]--;
+                        return;
+                }
+                for (j = i; j < net_end && h->nets[j].nets[n]; j++) {
+		        h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
+		        h->nets[j].nets[n] = h->nets[j + 1].nets[n];
+                }
+                h->nets[j].nets[n] = 0;
+                return;
 	}
 }
 #endif
 
 /* Calculate the actual memory size of the set data */
 static size_t
-mtype_ahash_memsize(const struct htype *h, u8 nets_length)
+mtype_ahash_memsize(const struct htype *h, const struct htable *t,
+		    u8 nets_length, size_t dsize)
 {
 	u32 i;
-	struct htable *t = h->table;
 	size_t memsize = sizeof(*h)
 			 + sizeof(*t)
 #ifdef IP_SET_HASH_WITH_NETS
@@ -355,35 +346,70 @@ mtype_ahash_memsize(const struct htype *h, u8 nets_length)
 			 + jhash_size(t->htable_bits) * sizeof(struct hbucket);
 
 	for (i = 0; i < jhash_size(t->htable_bits); i++)
-		memsize += t->bucket[i].size * h->dsize;
+		memsize += t->bucket[i].size * dsize;
 
 	return memsize;
 }
 
+/* Get the ith element from the array block n */
+#define ahash_data(n, i, dsize)	\
+	((struct mtype_elem *)((n)->value + ((i) * (dsize))))
+
+static void
+mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
+{
+	int i;
+
+	for (i = 0; i < n->pos; i++)
+		ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
+}
+
 /* Flush a hash type of set: destroy all elements */
 static void
 mtype_flush(struct ip_set *set)
 {
 	struct htype *h = set->data;
-	struct htable *t = h->table;
+	struct htable *t;
 	struct hbucket *n;
 	u32 i;
 
+	t = rcu_dereference_bh_nfnl(h->table);
 	for (i = 0; i < jhash_size(t->htable_bits); i++) {
 		n = hbucket(t, i);
 		if (n->size) {
+			if (set->extensions & IPSET_EXT_DESTROY)
+				mtype_ext_cleanup(set, n);
 			n->size = n->pos = 0;
 			/* FIXME: use slab cache */
 			kfree(n->value);
 		}
 	}
 #ifdef IP_SET_HASH_WITH_NETS
-	memset(h->nets, 0, sizeof(struct net_prefixes)
-			   * NETS_LENGTH(set->family));
+	memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family));
 #endif
 	h->elements = 0;
 }
 
+/* Destroy the hashtable part of the set */
+static void
+mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
+{
+	struct hbucket *n;
+	u32 i;
+
+	for (i = 0; i < jhash_size(t->htable_bits); i++) {
+		n = hbucket(t, i);
+		if (n->size) {
+			if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
+				mtype_ext_cleanup(set, n);
+			/* FIXME: use slab cache */
+			kfree(n->value);
+		}
+	}
+
+	ip_set_free(t);
+}
+
 /* Destroy a hash type of set */
 static void
 mtype_destroy(struct ip_set *set)
@@ -393,7 +419,7 @@ mtype_destroy(struct ip_set *set)
 	if (set->extensions & IPSET_EXT_TIMEOUT)
 		del_timer_sync(&h->gc);
 
-	ahash_destroy(h->table);
+	mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table), true);
 #ifdef IP_SET_HASH_WITH_RBTREE
 	rbtree_destroy(&h->rbtree);
 #endif
@@ -410,10 +436,10 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	init_timer(&h->gc);
 	h->gc.data = (unsigned long) set;
 	h->gc.function = gc;
-	h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
+	h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&h->gc);
 	pr_debug("gc initialized, run in every %u\n",
-		 IPSET_GC_PERIOD(h->timeout));
+		 IPSET_GC_PERIOD(set->timeout));
 }
 
 static bool
@@ -424,37 +450,40 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
 
 	/* Resizing changes htable_bits, so we ignore it */
 	return x->maxelem == y->maxelem &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 #ifdef IP_SET_HASH_WITH_NETMASK
 	       x->netmask == y->netmask &&
 #endif
 	       a->extensions == b->extensions;
 }
 
-/* Get the ith element from the array block n */
-#define ahash_data(n, i, dsize)	\
-	((struct mtype_elem *)((n)->value + ((i) * (dsize))))
-
 /* Delete expired elements from the hashtable */
 static void
-mtype_expire(struct htype *h, u8 nets_length, size_t dsize)
+mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
 {
-	struct htable *t = h->table;
+	struct htable *t;
 	struct hbucket *n;
 	struct mtype_elem *data;
 	u32 i;
 	int j;
+#ifdef IP_SET_HASH_WITH_NETS
+	u8 k;
+#endif
 
+	rcu_read_lock_bh();
+	t = rcu_dereference_bh(h->table);
 	for (i = 0; i < jhash_size(t->htable_bits); i++) {
 		n = hbucket(t, i);
 		for (j = 0; j < n->pos; j++) {
 			data = ahash_data(n, j, dsize);
-			if (ip_set_timeout_expired(ext_timeout(data, h))) {
+			if (ip_set_timeout_expired(ext_timeout(data, set))) {
 				pr_debug("expired %u/%u\n", i, j);
 #ifdef IP_SET_HASH_WITH_NETS
-				mtype_del_cidr(h, CIDR(data->cidr),
-					       nets_length);
+				for (k = 0; k < IPSET_NET_COUNT; k++)
+					mtype_del_cidr(h, CIDR(data->cidr, k),
+						       nets_length, k);
 #endif
+				ip_set_ext_destroy(set, data);
 				if (j != n->pos - 1)
 					/* Not last one */
 					memcpy(data,
@@ -477,6 +506,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize)
 			n->value = tmp;
 		}
 	}
+	rcu_read_unlock_bh();
 }
 
 static void
@@ -487,10 +517,10 @@ mtype_gc(unsigned long ul_set)
 
 	pr_debug("called\n");
 	write_lock_bh(&set->lock);
-	mtype_expire(h, NETS_LENGTH(set->family), h->dsize);
+	mtype_expire(set, h, NLEN(set->family), set->dsize);
 	write_unlock_bh(&set->lock);
 
-	h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
+	h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&h->gc);
 }
 
@@ -501,7 +531,7 @@ static int
 mtype_resize(struct ip_set *set, bool retried)
 {
 	struct htype *h = set->data;
-	struct htable *t, *orig = h->table;
+	struct htable *t, *orig = rcu_dereference_bh_nfnl(h->table);
 	u8 htable_bits = orig->htable_bits;
 #ifdef IP_SET_HASH_WITH_NETS
 	u8 flags;
@@ -516,8 +546,7 @@ mtype_resize(struct ip_set *set, bool retried)
 	if (SET_WITH_TIMEOUT(set) && !retried) {
 		i = h->elements;
 		write_lock_bh(&set->lock);
-		mtype_expire(set->data, NETS_LENGTH(set->family),
-			     h->dsize);
+		mtype_expire(set, set->data, NLEN(set->family), set->dsize);
 		write_unlock_bh(&set->lock);
 		if (h->elements < i)
 			return 0;
@@ -544,25 +573,25 @@ retry:
 	for (i = 0; i < jhash_size(orig->htable_bits); i++) {
 		n = hbucket(orig, i);
 		for (j = 0; j < n->pos; j++) {
-			data = ahash_data(n, j, h->dsize);
+			data = ahash_data(n, j, set->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
 			flags = 0;
 			mtype_data_reset_flags(data, &flags);
 #endif
 			m = hbucket(t, HKEY(data, h->initval, htable_bits));
-			ret = hbucket_elem_add(m, AHASH_MAX(h), h->dsize);
+			ret = hbucket_elem_add(m, AHASH_MAX(h), set->dsize);
 			if (ret < 0) {
 #ifdef IP_SET_HASH_WITH_NETS
 				mtype_data_reset_flags(data, &flags);
 #endif
 				read_unlock_bh(&set->lock);
-				ahash_destroy(t);
+				mtype_ahash_destroy(set, t, false);
 				if (ret == -EAGAIN)
 					goto retry;
 				return ret;
 			}
-			d = ahash_data(m, m->pos++, h->dsize);
-			memcpy(d, data, h->dsize);
+			d = ahash_data(m, m->pos++, set->dsize);
+			memcpy(d, data, set->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
 			mtype_data_reset_flags(d, &flags);
 #endif
@@ -577,7 +606,7 @@ retry:
 
 	pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
 		 orig->htable_bits, orig, t->htable_bits, t);
-	ahash_destroy(orig);
+	mtype_ahash_destroy(set, orig, false);
 
 	return 0;
 }
@@ -600,7 +629,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 
 	if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
 		/* FIXME: when set is full, we slow down here */
-		mtype_expire(h, NETS_LENGTH(set->family), h->dsize);
+		mtype_expire(set, h, NLEN(set->family), set->dsize);
 
 	if (h->elements >= h->maxelem) {
 		if (net_ratelimit())
@@ -614,11 +643,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	key = HKEY(value, h->initval, t->htable_bits);
 	n = hbucket(t, key);
 	for (i = 0; i < n->pos; i++) {
-		data = ahash_data(n, i, h->dsize);
+		data = ahash_data(n, i, set->dsize);
 		if (mtype_data_equal(data, d, &multi)) {
 			if (flag_exist ||
 			    (SET_WITH_TIMEOUT(set) &&
-			     ip_set_timeout_expired(ext_timeout(data, h)))) {
+			     ip_set_timeout_expired(ext_timeout(data, set)))) {
 				/* Just the extensions could be overwritten */
 				j = i;
 				goto reuse_slot;
@@ -629,30 +658,37 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 		}
 		/* Reuse first timed out entry */
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(data, h)) &&
+		    ip_set_timeout_expired(ext_timeout(data, set)) &&
 		    j != AHASH_MAX(h) + 1)
 			j = i;
 	}
 reuse_slot:
 	if (j != AHASH_MAX(h) + 1) {
 		/* Fill out reused slot */
-		data = ahash_data(n, j, h->dsize);
+		data = ahash_data(n, j, set->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
-		mtype_del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family));
-		mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+		for (i = 0; i < IPSET_NET_COUNT; i++) {
+			mtype_del_cidr(h, CIDR(data->cidr, i),
+				       NLEN(set->family), i);
+			mtype_add_cidr(h, CIDR(d->cidr, i),
+				       NLEN(set->family), i);
+		}
 #endif
+		ip_set_ext_destroy(set, data);
 	} else {
 		/* Use/create a new slot */
 		TUNE_AHASH_MAX(h, multi);
-		ret = hbucket_elem_add(n, AHASH_MAX(h), h->dsize);
+		ret = hbucket_elem_add(n, AHASH_MAX(h), set->dsize);
 		if (ret != 0) {
 			if (ret == -EAGAIN)
 				mtype_data_next(&h->next, d);
 			goto out;
 		}
-		data = ahash_data(n, n->pos++, h->dsize);
+		data = ahash_data(n, n->pos++, set->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
-		mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+		for (i = 0; i < IPSET_NET_COUNT; i++)
+			mtype_add_cidr(h, CIDR(d->cidr, i), NLEN(set->family),
+				       i);
 #endif
 		h->elements++;
 	}
@@ -661,9 +697,11 @@ reuse_slot:
 	mtype_data_set_flags(data, flags);
 #endif
 	if (SET_WITH_TIMEOUT(set))
-		ip_set_timeout_set(ext_timeout(data, h), ext->timeout);
+		ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
 	if (SET_WITH_COUNTER(set))
-		ip_set_init_counter(ext_counter(data, h), ext);
+		ip_set_init_counter(ext_counter(data, set), ext);
+	if (SET_WITH_COMMENT(set))
+		ip_set_init_comment(ext_comment(data, set), ext);
 
 out:
 	rcu_read_unlock_bh();
@@ -678,47 +716,60 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	  struct ip_set_ext *mext, u32 flags)
 {
 	struct htype *h = set->data;
-	struct htable *t = h->table;
+	struct htable *t;
 	const struct mtype_elem *d = value;
 	struct mtype_elem *data;
 	struct hbucket *n;
-	int i;
+	int i, ret = -IPSET_ERR_EXIST;
+#ifdef IP_SET_HASH_WITH_NETS
+	u8 j;
+#endif
 	u32 key, multi = 0;
 
+	rcu_read_lock_bh();
+	t = rcu_dereference_bh(h->table);
 	key = HKEY(value, h->initval, t->htable_bits);
 	n = hbucket(t, key);
 	for (i = 0; i < n->pos; i++) {
-		data = ahash_data(n, i, h->dsize);
+		data = ahash_data(n, i, set->dsize);
 		if (!mtype_data_equal(data, d, &multi))
 			continue;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(data, h)))
-			return -IPSET_ERR_EXIST;
+		    ip_set_timeout_expired(ext_timeout(data, set)))
+			goto out;
 		if (i != n->pos - 1)
 			/* Not last one */
-			memcpy(data, ahash_data(n, n->pos - 1, h->dsize),
-			       h->dsize);
+			memcpy(data, ahash_data(n, n->pos - 1, set->dsize),
+			       set->dsize);
 
 		n->pos--;
 		h->elements--;
 #ifdef IP_SET_HASH_WITH_NETS
-		mtype_del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+		for (j = 0; j < IPSET_NET_COUNT; j++)
+			mtype_del_cidr(h, CIDR(d->cidr, j), NLEN(set->family),
+				       j);
 #endif
+		ip_set_ext_destroy(set, data);
 		if (n->pos + AHASH_INIT_SIZE < n->size) {
 			void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
-					    * h->dsize,
+					    * set->dsize,
 					    GFP_ATOMIC);
-			if (!tmp)
-				return 0;
+			if (!tmp) {
+				ret = 0;
+				goto out;
+			}
 			n->size -= AHASH_INIT_SIZE;
-			memcpy(tmp, n->value, n->size * h->dsize);
+			memcpy(tmp, n->value, n->size * set->dsize);
 			kfree(n->value);
 			n->value = tmp;
 		}
-		return 0;
+		ret = 0;
+		goto out;
 	}
 
-	return -IPSET_ERR_EXIST;
+out:
+	rcu_read_unlock_bh();
+	return ret;
 }
 
 static inline int
@@ -726,8 +777,7 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
 		 struct ip_set_ext *mext, struct ip_set *set, u32 flags)
 {
 	if (SET_WITH_COUNTER(set))
-		ip_set_update_counter(ext_counter(data,
-						  (struct htype *)(set->data)),
+		ip_set_update_counter(ext_counter(data, set),
 				      ext, mext, flags);
 	return mtype_do_data_match(data);
 }
@@ -741,25 +791,38 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
 		 struct ip_set_ext *mext, u32 flags)
 {
 	struct htype *h = set->data;
-	struct htable *t = h->table;
+	struct htable *t = rcu_dereference_bh(h->table);
 	struct hbucket *n;
 	struct mtype_elem *data;
+#if IPSET_NET_COUNT == 2
+	struct mtype_elem orig = *d;
+	int i, j = 0, k;
+#else
 	int i, j = 0;
+#endif
 	u32 key, multi = 0;
-	u8 nets_length = NETS_LENGTH(set->family);
+	u8 nets_length = NLEN(set->family);
 
 	pr_debug("test by nets\n");
-	for (; j < nets_length && h->nets[j].nets && !multi; j++) {
-		mtype_data_netmask(d, h->nets[j].cidr);
+	for (; j < nets_length && h->nets[j].nets[0] && !multi; j++) {
+#if IPSET_NET_COUNT == 2
+		mtype_data_reset_elem(d, &orig);
+		mtype_data_netmask(d, h->nets[j].cidr[0], false);
+		for (k = 0; k < nets_length && h->nets[k].nets[1] && !multi;
+		     k++) {
+			mtype_data_netmask(d, h->nets[k].cidr[1], true);
+#else
+		mtype_data_netmask(d, h->nets[j].cidr[0]);
+#endif
 		key = HKEY(d, h->initval, t->htable_bits);
 		n = hbucket(t, key);
 		for (i = 0; i < n->pos; i++) {
-			data = ahash_data(n, i, h->dsize);
+			data = ahash_data(n, i, set->dsize);
 			if (!mtype_data_equal(data, d, &multi))
 				continue;
 			if (SET_WITH_TIMEOUT(set)) {
 				if (!ip_set_timeout_expired(
-							ext_timeout(data, h)))
+						ext_timeout(data, set)))
 					return mtype_data_match(data, ext,
 								mext, set,
 								flags);
@@ -770,6 +833,9 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
 				return mtype_data_match(data, ext,
 							mext, set, flags);
 		}
+#if IPSET_NET_COUNT == 2
+		}
+#endif
 	}
 	return 0;
 }
@@ -781,30 +847,41 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	   struct ip_set_ext *mext, u32 flags)
 {
 	struct htype *h = set->data;
-	struct htable *t = h->table;
+	struct htable *t;
 	struct mtype_elem *d = value;
 	struct hbucket *n;
 	struct mtype_elem *data;
-	int i;
+	int i, ret = 0;
 	u32 key, multi = 0;
 
+	rcu_read_lock_bh();
+	t = rcu_dereference_bh(h->table);
 #ifdef IP_SET_HASH_WITH_NETS
 	/* If we test an IP address and not a network address,
 	 * try all possible network sizes */
-	if (CIDR(d->cidr) == SET_HOST_MASK(set->family))
-		return mtype_test_cidrs(set, d, ext, mext, flags);
+	for (i = 0; i < IPSET_NET_COUNT; i++)
+		if (CIDR(d->cidr, i) != SET_HOST_MASK(set->family))
+			break;
+	if (i == IPSET_NET_COUNT) {
+		ret = mtype_test_cidrs(set, d, ext, mext, flags);
+		goto out;
+	}
 #endif
 
 	key = HKEY(d, h->initval, t->htable_bits);
 	n = hbucket(t, key);
 	for (i = 0; i < n->pos; i++) {
-		data = ahash_data(n, i, h->dsize);
+		data = ahash_data(n, i, set->dsize);
 		if (mtype_data_equal(data, d, &multi) &&
 		    !(SET_WITH_TIMEOUT(set) &&
-		      ip_set_timeout_expired(ext_timeout(data, h))))
-			return mtype_data_match(data, ext, mext, set, flags);
+		      ip_set_timeout_expired(ext_timeout(data, set)))) {
+			ret = mtype_data_match(data, ext, mext, set, flags);
+			goto out;
+		}
 	}
-	return 0;
+out:
+	rcu_read_unlock_bh();
+	return ret;
 }
 
 /* Reply a HEADER request: fill out the header part of the set */
@@ -812,18 +889,18 @@ static int
 mtype_head(struct ip_set *set, struct sk_buff *skb)
 {
 	const struct htype *h = set->data;
+	const struct htable *t;
 	struct nlattr *nested;
 	size_t memsize;
 
-	read_lock_bh(&set->lock);
-	memsize = mtype_ahash_memsize(h, NETS_LENGTH(set->family));
-	read_unlock_bh(&set->lock);
+	t = rcu_dereference_bh_nfnl(h->table);
+	memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize);
 
 	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
 		goto nla_put_failure;
 	if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
-			  htonl(jhash_size(h->table->htable_bits))) ||
+			  htonl(jhash_size(t->htable_bits))) ||
 	    nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
 		goto nla_put_failure;
 #ifdef IP_SET_HASH_WITH_NETMASK
@@ -832,12 +909,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 		goto nla_put_failure;
 #endif
 	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
-	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
-	    ((set->extensions & IPSET_EXT_TIMEOUT) &&
-	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))) ||
-	    ((set->extensions & IPSET_EXT_COUNTER) &&
-	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
-			   htonl(IPSET_FLAG_WITH_COUNTERS))))
+	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
+		goto nla_put_failure;
+	if (unlikely(ip_set_put_flags(skb, set)))
 		goto nla_put_failure;
 	ipset_nest_end(skb, nested);
 
@@ -852,11 +926,11 @@ mtype_list(const struct ip_set *set,
 	   struct sk_buff *skb, struct netlink_callback *cb)
 {
 	const struct htype *h = set->data;
-	const struct htable *t = h->table;
+	const struct htable *t = rcu_dereference_bh_nfnl(h->table);
 	struct nlattr *atd, *nested;
 	const struct hbucket *n;
 	const struct mtype_elem *e;
-	u32 first = cb->args[2];
+	u32 first = cb->args[IPSET_CB_ARG0];
 	/* We assume that one hash bucket fills into one page */
 	void *incomplete;
 	int i;
@@ -865,20 +939,22 @@ mtype_list(const struct ip_set *set,
 	if (!atd)
 		return -EMSGSIZE;
 	pr_debug("list hash set %s\n", set->name);
-	for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
+	for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
+	     cb->args[IPSET_CB_ARG0]++) {
 		incomplete = skb_tail_pointer(skb);
-		n = hbucket(t, cb->args[2]);
-		pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
+		n = hbucket(t, cb->args[IPSET_CB_ARG0]);
+		pr_debug("cb->arg bucket: %lu, t %p n %p\n",
+			 cb->args[IPSET_CB_ARG0], t, n);
 		for (i = 0; i < n->pos; i++) {
-			e = ahash_data(n, i, h->dsize);
+			e = ahash_data(n, i, set->dsize);
 			if (SET_WITH_TIMEOUT(set) &&
-			    ip_set_timeout_expired(ext_timeout(e, h)))
+			    ip_set_timeout_expired(ext_timeout(e, set)))
 				continue;
 			pr_debug("list hash %lu hbucket %p i %u, data %p\n",
-				 cb->args[2], n, i, e);
+				 cb->args[IPSET_CB_ARG0], n, i, e);
 			nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 			if (!nested) {
-				if (cb->args[2] == first) {
+				if (cb->args[IPSET_CB_ARG0] == first) {
 					nla_nest_cancel(skb, atd);
 					return -EMSGSIZE;
 				} else
@@ -886,43 +962,37 @@ mtype_list(const struct ip_set *set,
 			}
 			if (mtype_data_list(skb, e))
 				goto nla_put_failure;
-			if (SET_WITH_TIMEOUT(set) &&
-			    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-					  htonl(ip_set_timeout_get(
-						ext_timeout(e, h)))))
-				goto nla_put_failure;
-			if (SET_WITH_COUNTER(set) &&
-			    ip_set_put_counter(skb, ext_counter(e, h)))
+			if (ip_set_put_extensions(skb, set, e, true))
 				goto nla_put_failure;
 			ipset_nest_end(skb, nested);
 		}
 	}
 	ipset_nest_end(skb, atd);
 	/* Set listing finished */
-	cb->args[2] = 0;
+	cb->args[IPSET_CB_ARG0] = 0;
 
 	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, incomplete);
-	ipset_nest_end(skb, atd);
-	if (unlikely(first == cb->args[2])) {
+	if (unlikely(first == cb->args[IPSET_CB_ARG0])) {
 		pr_warning("Can't list set %s: one bucket does not fit into "
 			   "a message. Please report it!\n", set->name);
-		cb->args[2] = 0;
+		cb->args[IPSET_CB_ARG0] = 0;
 		return -EMSGSIZE;
 	}
+	ipset_nest_end(skb, atd);
 	return 0;
 }
 
 static int
-TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
-	      const struct xt_action_param *par,
-	      enum ipset_adt adt, struct ip_set_adt_opt *opt);
+IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
+	    const struct xt_action_param *par,
+	    enum ipset_adt adt, struct ip_set_adt_opt *opt);
 
 static int
-TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
-	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
+IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
+	    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
 
 static const struct ip_set_type_variant mtype_variant = {
 	.kadt	= mtype_kadt,
@@ -942,16 +1012,17 @@ static const struct ip_set_type_variant mtype_variant = {
 
 #ifdef IP_SET_EMIT_CREATE
 static int
-TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
+IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
+			    struct nlattr *tb[], u32 flags)
 {
 	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
-	u32 cadt_flags = 0;
 	u8 hbits;
 #ifdef IP_SET_HASH_WITH_NETMASK
 	u8 netmask;
 #endif
 	size_t hsize;
 	struct HTYPE *h;
+	struct htable *t;
 
 	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
 		return -IPSET_ERR_INVALID_FAMILY;
@@ -1001,7 +1072,7 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	h->netmask = netmask;
 #endif
 	get_random_bytes(&h->initval, sizeof(h->initval));
-	h->timeout = IPSET_NO_TIMEOUT;
+	set->timeout = IPSET_NO_TIMEOUT;
 
 	hbits = htable_bits(hashsize);
 	hsize = htable_size(hbits);
@@ -1009,91 +1080,37 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		kfree(h);
 		return -ENOMEM;
 	}
-	h->table = ip_set_alloc(hsize);
-	if (!h->table) {
+	t = ip_set_alloc(hsize);
+	if (!t) {
 		kfree(h);
 		return -ENOMEM;
 	}
-	h->table->htable_bits = hbits;
+	t->htable_bits = hbits;
+	rcu_assign_pointer(h->table, t);
 
 	set->data = h;
-	if (set->family ==  NFPROTO_IPV4)
-		set->variant = &TOKEN(HTYPE, 4_variant);
-	else
-		set->variant = &TOKEN(HTYPE, 6_variant);
-
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			h->timeout =
-				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-			set->extensions |= IPSET_EXT_TIMEOUT;
-			if (set->family == NFPROTO_IPV4) {
-				h->dsize =
-					sizeof(struct TOKEN(HTYPE, 4ct_elem));
-				h->offset[IPSET_OFFSET_TIMEOUT] =
-					offsetof(struct TOKEN(HTYPE, 4ct_elem),
-						 timeout);
-				h->offset[IPSET_OFFSET_COUNTER] =
-					offsetof(struct TOKEN(HTYPE, 4ct_elem),
-						 counter);
-				TOKEN(HTYPE, 4_gc_init)(set,
-					TOKEN(HTYPE, 4_gc));
-			} else {
-				h->dsize =
-					sizeof(struct TOKEN(HTYPE, 6ct_elem));
-				h->offset[IPSET_OFFSET_TIMEOUT] =
-					offsetof(struct TOKEN(HTYPE, 6ct_elem),
-						 timeout);
-				h->offset[IPSET_OFFSET_COUNTER] =
-					offsetof(struct TOKEN(HTYPE, 6ct_elem),
-						 counter);
-				TOKEN(HTYPE, 6_gc_init)(set,
-					TOKEN(HTYPE, 6_gc));
-			}
-		} else {
-			if (set->family == NFPROTO_IPV4) {
-				h->dsize =
-					sizeof(struct TOKEN(HTYPE, 4c_elem));
-				h->offset[IPSET_OFFSET_COUNTER] =
-					offsetof(struct TOKEN(HTYPE, 4c_elem),
-						 counter);
-			} else {
-				h->dsize =
-					sizeof(struct TOKEN(HTYPE, 6c_elem));
-				h->offset[IPSET_OFFSET_COUNTER] =
-					offsetof(struct TOKEN(HTYPE, 6c_elem),
-						 counter);
-			}
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-		set->extensions |= IPSET_EXT_TIMEOUT;
-		if (set->family == NFPROTO_IPV4) {
-			h->dsize = sizeof(struct TOKEN(HTYPE, 4t_elem));
-			h->offset[IPSET_OFFSET_TIMEOUT] =
-				offsetof(struct TOKEN(HTYPE, 4t_elem),
-					 timeout);
-			TOKEN(HTYPE, 4_gc_init)(set, TOKEN(HTYPE, 4_gc));
-		} else {
-			h->dsize = sizeof(struct TOKEN(HTYPE, 6t_elem));
-			h->offset[IPSET_OFFSET_TIMEOUT] =
-				offsetof(struct TOKEN(HTYPE, 6t_elem),
-					 timeout);
-			TOKEN(HTYPE, 6_gc_init)(set, TOKEN(HTYPE, 6_gc));
-		}
+	if (set->family == NFPROTO_IPV4) {
+		set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
+		set->dsize = ip_set_elem_len(set, tb,
+				sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
 	} else {
+		set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
+		set->dsize = ip_set_elem_len(set, tb,
+				sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		if (set->family == NFPROTO_IPV4)
-			h->dsize = sizeof(struct TOKEN(HTYPE, 4_elem));
+			IPSET_TOKEN(HTYPE, 4_gc_init)(set,
+				IPSET_TOKEN(HTYPE, 4_gc));
 		else
-			h->dsize = sizeof(struct TOKEN(HTYPE, 6_elem));
+			IPSET_TOKEN(HTYPE, 6_gc_init)(set,
+				IPSET_TOKEN(HTYPE, 6_gc));
 	}
 
 	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
-		 set->name, jhash_size(h->table->htable_bits),
-		 h->table->htable_bits, h->maxelem, set->data, h->table);
+		 set->name, jhash_size(t->htable_bits),
+		 t->htable_bits, h->maxelem, set->data, t);
 
 	return 0;
 }
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index c74e6e1..e65fc24 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -23,19 +23,20 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1	/* Counters support */
+#define IPSET_TYPE_REV_MIN	0
+/*				1	   Counters support */
+#define IPSET_TYPE_REV_MAX	2	/* Comments support */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip");
 
 /* Type specific function prefix */
 #define HTYPE		hash_ip
 #define IP_SET_HASH_WITH_NETMASK
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements */
 struct hash_ip4_elem {
@@ -43,22 +44,6 @@ struct hash_ip4_elem {
 	__be32 ip;
 };
 
-struct hash_ip4t_elem {
-	__be32 ip;
-	unsigned long timeout;
-};
-
-struct hash_ip4c_elem {
-	__be32 ip;
-	struct ip_set_counter counter;
-};
-
-struct hash_ip4ct_elem {
-	__be32 ip;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -99,7 +84,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ip4_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	__be32 ip;
 
 	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip);
@@ -118,8 +103,8 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ip4_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
-	u32 ip, ip_to, hosts;
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip = 0, ip_to = 0, hosts;
 	int ret = 0;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -178,29 +163,13 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 /* Member elements */
 struct hash_ip6_elem {
 	union nf_inet_addr ip;
 };
 
-struct hash_ip6t_elem {
-	union nf_inet_addr ip;
-	unsigned long timeout;
-};
-
-struct hash_ip6c_elem {
-	union nf_inet_addr ip;
-	struct ip_set_counter counter;
-};
-
-struct hash_ip6ct_elem {
-	union nf_inet_addr ip;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -253,7 +222,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ip6_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
 	hash_ip6_netmask(&e.ip, h->netmask);
@@ -270,7 +239,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ip6_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -304,8 +273,8 @@ static struct ip_set_type hash_ip_type __read_mostly = {
 	.features	= IPSET_TYPE_IP,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_ip_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
@@ -324,6 +293,7 @@ static struct ip_set_type hash_ip_type __read_mostly = {
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
 		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 7a2d2bd..525a595 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -24,19 +24,20 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    SCTP and UDPLITE support added */
-#define REVISION_MAX	2 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+/*				2    Counters support added */
+#define IPSET_TYPE_REV_MAX	3 /* Comments support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port");
 
 /* Type specific function prefix */
 #define HTYPE		hash_ipport
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements */
 struct hash_ipport4_elem {
@@ -46,31 +47,6 @@ struct hash_ipport4_elem {
 	u8 padding;
 };
 
-struct hash_ipport4t_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	unsigned long timeout;
-};
-
-struct hash_ipport4c_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipport4ct_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -116,10 +92,9 @@ hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
 		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport4_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
 				 &e.port, &e.proto))
@@ -136,8 +111,8 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport4_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
-	u32 ip, ip_to, p = 0, port, port_to;
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip, ip_to = 0, p = 0, port, port_to;
 	bool with_ports = false;
 	int ret;
 
@@ -222,7 +197,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_ipport6_elem {
 	union nf_inet_addr ip;
@@ -231,31 +206,6 @@ struct hash_ipport6_elem {
 	u8 padding;
 };
 
-struct hash_ipport6t_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	unsigned long timeout;
-};
-
-struct hash_ipport6c_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipport6ct_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -306,10 +256,9 @@ hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
 		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport6_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
 				 &e.port, &e.proto))
@@ -326,7 +275,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport6_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to;
 	bool with_ports = false;
 	int ret;
@@ -396,8 +345,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_ipport_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
@@ -419,6 +368,7 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
 		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 34e8a1a..f563663 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -24,19 +24,20 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    SCTP and UDPLITE support added */
-#define REVISION_MAX	2 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+/*				2    Counters support added */
+#define IPSET_TYPE_REV_MAX	3 /* Comments support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port,ip");
 
 /* Type specific function prefix */
 #define HTYPE		hash_ipportip
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements  */
 struct hash_ipportip4_elem {
@@ -47,34 +48,6 @@ struct hash_ipportip4_elem {
 	u8 padding;
 };
 
-struct hash_ipportip4t_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	unsigned long timeout;
-};
-
-struct hash_ipportip4c_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipportip4ct_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 static inline bool
 hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
 			  const struct hash_ipportip4_elem *ip2,
@@ -120,10 +93,9 @@ hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		    const struct xt_action_param *par,
 		    enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip4_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
 				 &e.port, &e.proto))
@@ -141,8 +113,8 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip4_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
-	u32 ip, ip_to, p = 0, port, port_to;
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip, ip_to = 0, p = 0, port, port_to;
 	bool with_ports = false;
 	int ret;
 
@@ -231,7 +203,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_ipportip6_elem {
 	union nf_inet_addr ip;
@@ -241,34 +213,6 @@ struct hash_ipportip6_elem {
 	u8 padding;
 };
 
-struct hash_ipportip6t_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	unsigned long timeout;
-};
-
-struct hash_ipportip6c_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipportip6ct_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -319,10 +263,9 @@ hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		    const struct xt_action_param *par,
 		    enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip6_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
 				 &e.port, &e.proto))
@@ -340,7 +283,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip6_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to;
 	bool with_ports = false;
 	int ret;
@@ -414,8 +357,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
 	.dimension	= IPSET_DIM_THREE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_ipportip_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
@@ -437,6 +380,7 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
 		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index c6a5253..5d87fe8 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -24,15 +24,16 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    SCTP and UDPLITE support added */
-/*			2    Range as input support for IPv4 added */
-/*			3    nomatch flag support added */
-#define REVISION_MAX	4 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+/*				2    Range as input support for IPv4 added */
+/*				3    nomatch flag support added */
+/*				4    Counters support added */
+#define IPSET_TYPE_REV_MAX	5 /* Comments support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port,net");
 
 /* Type specific function prefix */
@@ -46,7 +47,7 @@ MODULE_ALIAS("ip_set_hash:ip,port,net");
 #define IP_SET_HASH_WITH_PROTO
 #define IP_SET_HASH_WITH_NETS
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements */
 struct hash_ipportnet4_elem {
@@ -58,37 +59,6 @@ struct hash_ipportnet4_elem {
 	u8 proto;
 };
 
-struct hash_ipportnet4t_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	unsigned long timeout;
-};
-
-struct hash_ipportnet4c_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipportnet4ct_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -170,9 +140,9 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet4_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (adt == IPSET_TEST)
 		e.cidr = HOST_MASK - 1;
@@ -195,9 +165,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
-	u32 ip, ip_to, p = 0, port, port_to;
-	u32 ip2_from, ip2_to, ip2_last, ip2;
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip = 0, ip_to = 0, p = 0, port, port_to;
+	u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
 	bool with_ports = false;
 	u8 cidr;
 	int ret;
@@ -260,7 +230,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 		e.ip = htonl(ip);
 		e.ip2 = htonl(ip2_from & ip_set_hostmask(e.cidr + 1));
 		ret = adtfn(set, &e, &ext, &ext, flags);
-		return ip_set_enomatch(ret, flags, adt) ? 1 :
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
 		       ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
@@ -272,7 +242,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 		if (ip > ip_to)
 			swap(ip, ip_to);
 	} else if (tb[IPSET_ATTR_CIDR]) {
-		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
 
 		if (!cidr || cidr > 32)
 			return -IPSET_ERR_INVALID_CIDR;
@@ -306,9 +276,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 						       : port;
 		for (; p <= port_to; p++) {
 			e.port = htons(p);
-			ip2 = retried
-			      && ip == ntohl(h->next.ip)
-			      && p == ntohs(h->next.port)
+			ip2 = retried &&
+			      ip == ntohl(h->next.ip) &&
+			      p == ntohs(h->next.port)
 				? ntohl(h->next.ip2) : ip2_from;
 			while (!after(ip2, ip2_to)) {
 				e.ip2 = htonl(ip2);
@@ -328,7 +298,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_ipportnet6_elem {
 	union nf_inet_addr ip;
@@ -339,37 +309,6 @@ struct hash_ipportnet6_elem {
 	u8 proto;
 };
 
-struct hash_ipportnet6t_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	unsigned long timeout;
-};
-
-struct hash_ipportnet6c_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipportnet6ct_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -454,9 +393,9 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet6_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (adt == IPSET_TEST)
 		e.cidr = HOST_MASK - 1;
@@ -479,7 +418,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to;
 	bool with_ports = false;
 	u8 cidr;
@@ -544,7 +483,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &e, &ext, &ext, flags);
-		return ip_set_enomatch(ret, flags, adt) ? 1 :
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
 		       ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
@@ -574,8 +513,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
 			  IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_THREE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_ipportnet_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
@@ -600,6 +539,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
 		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index da740ce..8295cf4 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -22,21 +22,22 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    Range as input support for IPv4 added */
-/*			2    nomatch flag support added */
-#define REVISION_MAX	3 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    Range as input support for IPv4 added */
+/*				2    nomatch flag support added */
+/*				3    Counters support added */
+#define IPSET_TYPE_REV_MAX	4 /* Comments support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:net");
 
 /* Type specific function prefix */
 #define HTYPE		hash_net
 #define IP_SET_HASH_WITH_NETS
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements  */
 struct hash_net4_elem {
@@ -46,31 +47,6 @@ struct hash_net4_elem {
 	u8 cidr;
 };
 
-struct hash_net4t_elem {
-	__be32 ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	unsigned long timeout;
-};
-
-struct hash_net4c_elem {
-	__be32 ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	struct ip_set_counter counter;
-};
-
-struct hash_net4ct_elem {
-	__be32 ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -143,9 +119,9 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_net4_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (e.cidr == 0)
 		return -EINVAL;
@@ -165,8 +141,8 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_net4_elem e = { .cidr = HOST_MASK };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
-	u32 ip = 0, ip_to, last;
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip = 0, ip_to = 0, last;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -199,7 +175,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
 		e.ip = htonl(ip & ip_set_hostmask(e.cidr));
 		ret = adtfn(set, &e, &ext, &ext, flags);
-		return ip_set_enomatch(ret, flags, adt) ? 1 :
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret:
 		       ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
@@ -228,7 +204,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_net6_elem {
 	union nf_inet_addr ip;
@@ -237,31 +213,6 @@ struct hash_net6_elem {
 	u8 cidr;
 };
 
-struct hash_net6t_elem {
-	union nf_inet_addr ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	unsigned long timeout;
-};
-
-struct hash_net6c_elem {
-	union nf_inet_addr ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	struct ip_set_counter counter;
-};
-
-struct hash_net6ct_elem {
-	union nf_inet_addr ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -338,9 +289,9 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_net6_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (e.cidr == 0)
 		return -EINVAL;
@@ -357,10 +308,9 @@ static int
 hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
 	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_net6_elem e = { .cidr = HOST_MASK };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -396,7 +346,7 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	ret = adtfn(set, &e, &ext, &ext, flags);
 
-	return ip_set_enomatch(ret, flags, adt) ? 1 :
+	return ip_set_enomatch(ret, flags, adt, set) ? -ret :
 	       ip_set_eexist(ret, flags) ? 0 : ret;
 }
 
@@ -406,8 +356,8 @@ static struct ip_set_type hash_net_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_net_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
@@ -425,6 +375,7 @@ static struct ip_set_type hash_net_type __read_mostly = {
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
 		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 84ae6f6..3f64a66 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -23,14 +23,15 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    nomatch flag support added */
-/*			2    /0 support added */
-#define REVISION_MAX	3 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    nomatch flag support added */
+/*				2    /0 support added */
+/*				3    Counters support added */
+#define IPSET_TYPE_REV_MAX	4 /* Comments support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:net,iface", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:net,iface");
 
 /* Interface name rbtree */
@@ -134,7 +135,7 @@ iface_add(struct rb_root *root, const char **iface)
 
 #define STREQ(a, b)	(strcmp(a, b) == 0)
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 struct hash_netiface4_elem_hashed {
 	__be32 ip;
@@ -144,7 +145,7 @@ struct hash_netiface4_elem_hashed {
 	u8 elem;
 };
 
-/* Member elements without timeout */
+/* Member elements */
 struct hash_netiface4_elem {
 	__be32 ip;
 	u8 physdev;
@@ -154,37 +155,6 @@ struct hash_netiface4_elem {
 	const char *iface;
 };
 
-struct hash_netiface4t_elem {
-	__be32 ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	unsigned long timeout;
-};
-
-struct hash_netiface4c_elem {
-	__be32 ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	struct ip_set_counter counter;
-};
-
-struct hash_netiface4ct_elem {
-	__be32 ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -265,10 +235,10 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface4_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 		.elem = 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	int ret;
 
 	if (e.cidr == 0)
@@ -319,8 +289,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
-	u32 ip = 0, ip_to, last;
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip = 0, ip_to = 0, last;
 	char iface[IFNAMSIZ];
 	int ret;
 
@@ -368,7 +338,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
 		e.ip = htonl(ip & ip_set_hostmask(e.cidr));
 		ret = adtfn(set, &e, &ext, &ext, flags);
-		return ip_set_enomatch(ret, flags, adt) ? 1 :
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
 		       ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
@@ -399,7 +369,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_netiface6_elem_hashed {
 	union nf_inet_addr ip;
@@ -418,37 +388,6 @@ struct hash_netiface6_elem {
 	const char *iface;
 };
 
-struct hash_netiface6t_elem {
-	union nf_inet_addr ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	unsigned long timeout;
-};
-
-struct hash_netiface6c_elem {
-	union nf_inet_addr ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	struct ip_set_counter counter;
-};
-
-struct hash_netiface6ct_elem {
-	union nf_inet_addr ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -534,10 +473,10 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface6_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 		.elem = 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	int ret;
 
 	if (e.cidr == 0)
@@ -584,7 +523,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	char iface[IFNAMSIZ];
 	int ret;
 
@@ -634,7 +573,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	ret = adtfn(set, &e, &ext, &ext, flags);
 
-	return ip_set_enomatch(ret, flags, adt) ? 1 :
+	return ip_set_enomatch(ret, flags, adt, set) ? -ret :
 	       ip_set_eexist(ret, flags) ? 0 : ret;
 }
 
@@ -645,8 +584,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
 			  IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_netiface_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
@@ -668,6 +607,7 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
 		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
 		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
new file mode 100644
index 0000000..2bc2dec
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -0,0 +1,481 @@
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2013 Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:net type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	0
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
+IP_SET_MODULE_DESC("hash:net,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:net,net");
+
+/* Type specific function prefix */
+#define HTYPE		hash_netnet
+#define IP_SET_HASH_WITH_NETS
+#define IPSET_NET_COUNT 2
+
+/* IPv4 variants */
+
+/* Member elements  */
+struct hash_netnet4_elem {
+	union {
+		__be32 ip[2];
+		__be64 ipcmp;
+	};
+	u8 nomatch;
+	union {
+		u8 cidr[2];
+		u16 ccmp;
+	};
+};
+
+/* Common functions */
+
+static inline bool
+hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
+		     const struct hash_netnet4_elem *ip2,
+		     u32 *multi)
+{
+	return ip1->ipcmp == ip2->ipcmp &&
+	       ip2->ccmp == ip2->ccmp;
+}
+
+static inline int
+hash_netnet4_do_data_match(const struct hash_netnet4_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static inline void
+hash_netnet4_data_set_flags(struct hash_netnet4_elem *elem, u32 flags)
+{
+	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
+}
+
+static inline void
+hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static inline void
+hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem,
+			  struct hash_netnet4_elem *orig)
+{
+	elem->ip[1] = orig->ip[1];
+}
+
+static inline void
+hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner)
+{
+	if (inner) {
+		elem->ip[1] &= ip_set_netmask(cidr);
+		elem->cidr[1] = cidr;
+	} else {
+		elem->ip[0] &= ip_set_netmask(cidr);
+		elem->cidr[0] = cidr;
+	}
+}
+
+static bool
+hash_netnet4_data_list(struct sk_buff *skb,
+		    const struct hash_netnet4_elem *data)
+{
+	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip[0]) ||
+	    nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip[1]) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static inline void
+hash_netnet4_data_next(struct hash_netnet4_elem *next,
+		    const struct hash_netnet4_elem *d)
+{
+	next->ipcmp = d->ipcmp;
+}
+
+#define MTYPE		hash_netnet4
+#define PF		4
+#define HOST_MASK	32
+#include "ip_set_hash_gen.h"
+
+static int
+hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
+	       const struct xt_action_param *par,
+	       enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_netnet *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netnet4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+	e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+	if (adt == IPSET_TEST)
+		e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK;
+
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]);
+	ip4addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1]);
+	e.ip[0] &= ip_set_netmask(e.cidr[0]);
+	e.ip[1] &= ip_set_netmask(e.cidr[1]);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	const struct hash_netnet *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netnet4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip = 0, ip_to = 0, last;
+	u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2;
+	u8 cidr, cidr2;
+	int ret;
+
+	e.cidr[0] = e.cidr[1] = HOST_MASK;
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+	      ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) ||
+	      ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR]) {
+		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		if (!cidr || cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		e.cidr[0] = cidr;
+	}
+
+	if (tb[IPSET_ATTR_CIDR2]) {
+		cidr2 = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+		if (!cidr2 || cidr2 > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		e.cidr[1] = cidr2;
+	}
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+	}
+
+	if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] &&
+				   tb[IPSET_ATTR_IP2_TO])) {
+		e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0]));
+		e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1]));
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ip_to = ip;
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip_to < ip)
+			swap(ip, ip_to);
+		if (ip + UINT_MAX == ip_to)
+			return -IPSET_ERR_HASH_RANGE;
+	}
+
+	ip2_to = ip2_from;
+	if (tb[IPSET_ATTR_IP2_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
+		if (ret)
+			return ret;
+		if (ip2_to < ip2_from)
+			swap(ip2_from, ip2_to);
+		if (ip2_from + UINT_MAX == ip2_to)
+			return -IPSET_ERR_HASH_RANGE;
+
+	}
+
+	if (retried)
+		ip = ntohl(h->next.ip[0]);
+
+	while (!after(ip, ip_to)) {
+		e.ip[0] = htonl(ip);
+		last = ip_set_range_to_cidr(ip, ip_to, &cidr);
+		e.cidr[0] = cidr;
+		ip2 = (retried &&
+		       ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1])
+						   : ip2_from;
+		while (!after(ip2, ip2_to)) {
+			e.ip[1] = htonl(ip2);
+			last2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr2);
+			e.cidr[1] = cidr2;
+			ret = adtfn(set, &e, &ext, &ext, flags);
+			if (ret && !ip_set_eexist(ret, flags))
+				return ret;
+			else
+				ret = 0;
+			ip2 = last2 + 1;
+		}
+		ip = last + 1;
+	}
+	return ret;
+}
+
+/* IPv6 variants */
+
+struct hash_netnet6_elem {
+	union nf_inet_addr ip[2];
+	u8 nomatch;
+	union {
+		u8 cidr[2];
+		u16 ccmp;
+	};
+};
+
+/* Common functions */
+
+static inline bool
+hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
+		     const struct hash_netnet6_elem *ip2,
+		     u32 *multi)
+{
+	return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
+	       ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
+	       ip1->ccmp == ip2->ccmp;
+}
+
+static inline int
+hash_netnet6_do_data_match(const struct hash_netnet6_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static inline void
+hash_netnet6_data_set_flags(struct hash_netnet6_elem *elem, u32 flags)
+{
+	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
+}
+
+static inline void
+hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static inline void
+hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem,
+			  struct hash_netnet6_elem *orig)
+{
+	elem->ip[1] = orig->ip[1];
+}
+
+static inline void
+hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner)
+{
+	if (inner) {
+		ip6_netmask(&elem->ip[1], cidr);
+		elem->cidr[1] = cidr;
+	} else {
+		ip6_netmask(&elem->ip[0], cidr);
+		elem->cidr[0] = cidr;
+	}
+}
+
+static bool
+hash_netnet6_data_list(struct sk_buff *skb,
+		    const struct hash_netnet6_elem *data)
+{
+	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip[0].in6) ||
+	    nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip[1].in6) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static inline void
+hash_netnet6_data_next(struct hash_netnet4_elem *next,
+		    const struct hash_netnet6_elem *d)
+{
+}
+
+#undef MTYPE
+#undef PF
+#undef HOST_MASK
+
+#define MTYPE		hash_netnet6
+#define PF		6
+#define HOST_MASK	128
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
+	       const struct xt_action_param *par,
+	       enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_netnet *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netnet6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+	e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+	if (adt == IPSET_TEST)
+		e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK;
+
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6);
+	ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6);
+	ip6_netmask(&e.ip[0], e.cidr[0]);
+	ip6_netmask(&e.ip[1], e.cidr[1]);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
+	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netnet6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	int ret;
+
+	e.cidr[0] = e.cidr[1] = HOST_MASK;
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
+		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) ||
+	      ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) ||
+	      ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR])
+		e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+	if (tb[IPSET_ATTR_CIDR2])
+		e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+
+	if (!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] ||
+	    e.cidr[1] > HOST_MASK)
+		return -IPSET_ERR_INVALID_CIDR;
+
+	ip6_netmask(&e.ip[0], e.cidr[0]);
+	ip6_netmask(&e.ip[1], e.cidr[1]);
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+	}
+
+	ret = adtfn(set, &e, &ext, &ext, flags);
+
+	return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+	       ip_set_eexist(ret, flags) ? 0 : ret;
+}
+
+static struct ip_set_type hash_netnet_type __read_mostly = {
+	.name		= "hash:net,net",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_IP2 | IPSET_TYPE_NOMATCH,
+	.dimension	= IPSET_DIM_TWO,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create		= hash_netnet_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP2]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP2_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_CIDR2]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_netnet_init(void)
+{
+	return ip_set_type_register(&hash_netnet_type);
+}
+
+static void __exit
+hash_netnet_fini(void)
+{
+	ip_set_type_unregister(&hash_netnet_type);
+}
+
+module_init(hash_netnet_init);
+module_exit(hash_netnet_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 9a08698..7097fb0 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -23,15 +23,16 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    SCTP and UDPLITE support added */
-/*			2    Range as input support for IPv4 added */
-/*			3    nomatch flag support added */
-#define REVISION_MAX	4 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+/*				2    Range as input support for IPv4 added */
+/*				3    nomatch flag support added */
+/*				4    Counters support added */
+#define IPSET_TYPE_REV_MAX	5 /* Comments support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:net,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:net,port");
 
 /* Type specific function prefix */
@@ -45,7 +46,7 @@ MODULE_ALIAS("ip_set_hash:net,port");
  */
 #define IP_SET_HASH_WITH_NETS_PACKED
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements */
 struct hash_netport4_elem {
@@ -56,34 +57,6 @@ struct hash_netport4_elem {
 	u8 nomatch:1;
 };
 
-struct hash_netport4t_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	unsigned long timeout;
-};
-
-struct hash_netport4c_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	struct ip_set_counter counter;
-};
-
-struct hash_netport4ct_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -162,9 +135,9 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netport4_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (adt == IPSET_TEST)
 		e.cidr = HOST_MASK - 1;
@@ -186,8 +159,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
-	u32 port, port_to, p = 0, ip = 0, ip_to, last;
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 port, port_to, p = 0, ip = 0, ip_to = 0, last;
 	bool with_ports = false;
 	u8 cidr;
 	int ret;
@@ -244,7 +217,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (adt == IPSET_TEST || !(with_ports || tb[IPSET_ATTR_IP_TO])) {
 		e.ip = htonl(ip & ip_set_hostmask(e.cidr + 1));
 		ret = adtfn(set, &e, &ext, &ext, flags);
-		return ip_set_enomatch(ret, flags, adt) ? 1 :
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
 		       ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
@@ -287,7 +260,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_netport6_elem {
 	union nf_inet_addr ip;
@@ -297,34 +270,6 @@ struct hash_netport6_elem {
 	u8 nomatch:1;
 };
 
-struct hash_netport6t_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	unsigned long timeout;
-};
-
-struct hash_netport6c_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	struct ip_set_counter counter;
-};
-
-struct hash_netport6ct_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -407,9 +352,9 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netport6_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1,
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (adt == IPSET_TEST)
 		e.cidr = HOST_MASK - 1;
@@ -431,7 +376,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netport6_elem e = { .cidr = HOST_MASK  - 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to;
 	bool with_ports = false;
 	u8 cidr;
@@ -489,7 +434,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &e, &ext, &ext, flags);
-		return ip_set_enomatch(ret, flags, adt) ? 1 :
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
 		       ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
@@ -518,8 +463,8 @@ static struct ip_set_type hash_netport_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_netport_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
@@ -542,6 +487,7 @@ static struct ip_set_type hash_netport_type __read_mostly = {
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
 		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
new file mode 100644
index 0000000..703d119
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -0,0 +1,586 @@
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:ip,port,net type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_getport.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	0 /* Comments support added */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
+IP_SET_MODULE_DESC("hash:net,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:net,port,net");
+
+/* Type specific function prefix */
+#define HTYPE		hash_netportnet
+#define IP_SET_HASH_WITH_PROTO
+#define IP_SET_HASH_WITH_NETS
+#define IPSET_NET_COUNT 2
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_netportnet4_elem {
+	union {
+		__be32 ip[2];
+		__be64 ipcmp;
+	};
+	__be16 port;
+	union {
+		u8 cidr[2];
+		u16 ccmp;
+	};
+	u8 nomatch:1;
+	u8 proto;
+};
+
+/* Common functions */
+
+static inline bool
+hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
+			   const struct hash_netportnet4_elem *ip2,
+			   u32 *multi)
+{
+	return ip1->ipcmp == ip2->ipcmp &&
+	       ip1->ccmp == ip2->ccmp &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static inline int
+hash_netportnet4_do_data_match(const struct hash_netportnet4_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static inline void
+hash_netportnet4_data_set_flags(struct hash_netportnet4_elem *elem, u32 flags)
+{
+	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
+}
+
+static inline void
+hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static inline void
+hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem,
+				struct hash_netportnet4_elem *orig)
+{
+	elem->ip[1] = orig->ip[1];
+}
+
+static inline void
+hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem,
+			      u8 cidr, bool inner)
+{
+	if (inner) {
+		elem->ip[1] &= ip_set_netmask(cidr);
+		elem->cidr[1] = cidr;
+	} else {
+		elem->ip[0] &= ip_set_netmask(cidr);
+		elem->cidr[0] = cidr;
+	}
+}
+
+static bool
+hash_netportnet4_data_list(struct sk_buff *skb,
+			  const struct hash_netportnet4_elem *data)
+{
+	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip[0]) ||
+	    nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip[1]) ||
+	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) ||
+	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static inline void
+hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
+			  const struct hash_netportnet4_elem *d)
+{
+	next->ipcmp = d->ipcmp;
+	next->port = d->port;
+}
+
+#define MTYPE		hash_netportnet4
+#define PF		4
+#define HOST_MASK	32
+#include "ip_set_hash_gen.h"
+
+static int
+hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		     const struct xt_action_param *par,
+		     enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_netportnet *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netportnet4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+	e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+	if (adt == IPSET_TEST)
+		e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK;
+
+	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
+				 &e.port, &e.proto))
+		return -EINVAL;
+
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]);
+	ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip[1]);
+	e.ip[0] &= ip_set_netmask(e.cidr[0]);
+	e.ip[1] &= ip_set_netmask(e.cidr[1]);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+		     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	const struct hash_netportnet *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netportnet4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to;
+	u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
+	bool with_ports = false;
+	u8 cidr, cidr2;
+	int ret;
+
+	e.cidr[0] = e.cidr[1] = HOST_MASK;
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+	      ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) ||
+	      ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR]) {
+		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		if (!cidr || cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		e.cidr[0] = cidr;
+	}
+
+	if (tb[IPSET_ATTR_CIDR2]) {
+		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+		if (!cidr || cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		e.cidr[1] = cidr;
+	}
+
+	if (tb[IPSET_ATTR_PORT])
+		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+	else
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
+
+		if (e.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else
+		return -IPSET_ERR_MISSING_PROTO;
+
+	if (!(with_ports || e.proto == IPPROTO_ICMP))
+		e.port = 0;
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+	}
+
+	with_ports = with_ports && tb[IPSET_ATTR_PORT_TO];
+	if (adt == IPSET_TEST ||
+	    !(tb[IPSET_ATTR_IP_TO] || with_ports || tb[IPSET_ATTR_IP2_TO])) {
+		e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0]));
+		e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1]));
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ip_to = ip;
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip > ip_to)
+			swap(ip, ip_to);
+		if (unlikely(ip + UINT_MAX == ip_to))
+			return -IPSET_ERR_HASH_RANGE;
+	}
+
+	port_to = port = ntohs(e.port);
+	if (tb[IPSET_ATTR_PORT_TO]) {
+		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+		if (port > port_to)
+			swap(port, port_to);
+	}
+
+	ip2_to = ip2_from;
+	if (tb[IPSET_ATTR_IP2_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
+		if (ret)
+			return ret;
+		if (ip2_from > ip2_to)
+			swap(ip2_from, ip2_to);
+		if (unlikely(ip2_from + UINT_MAX == ip2_to))
+			return -IPSET_ERR_HASH_RANGE;
+	}
+
+	if (retried)
+		ip = ntohl(h->next.ip[0]);
+
+	while (!after(ip, ip_to)) {
+		e.ip[0] = htonl(ip);
+		ip_last = ip_set_range_to_cidr(ip, ip_to, &cidr);
+		e.cidr[0] = cidr;
+		p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port)
+							  : port;
+		for (; p <= port_to; p++) {
+			e.port = htons(p);
+			ip2 = (retried && ip == ntohl(h->next.ip[0]) &&
+			       p == ntohs(h->next.port)) ? ntohl(h->next.ip[1])
+							 : ip2_from;
+			while (!after(ip2, ip2_to)) {
+				e.ip[1] = htonl(ip2);
+				ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
+								&cidr2);
+				e.cidr[1] = cidr2;
+				ret = adtfn(set, &e, &ext, &ext, flags);
+				if (ret && !ip_set_eexist(ret, flags))
+					return ret;
+				else
+					ret = 0;
+				ip2 = ip2_last + 1;
+			}
+		}
+		ip = ip_last + 1;
+	}
+	return ret;
+}
+
+/* IPv6 variant */
+
+struct hash_netportnet6_elem {
+	union nf_inet_addr ip[2];
+	__be16 port;
+	union {
+		u8 cidr[2];
+		u16 ccmp;
+	};
+	u8 nomatch:1;
+	u8 proto;
+};
+
+/* Common functions */
+
+static inline bool
+hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
+			   const struct hash_netportnet6_elem *ip2,
+			   u32 *multi)
+{
+	return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
+	       ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
+	       ip1->ccmp == ip2->ccmp &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static inline int
+hash_netportnet6_do_data_match(const struct hash_netportnet6_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static inline void
+hash_netportnet6_data_set_flags(struct hash_netportnet6_elem *elem, u32 flags)
+{
+	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
+}
+
+static inline void
+hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static inline void
+hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem,
+				struct hash_netportnet6_elem *orig)
+{
+	elem->ip[1] = orig->ip[1];
+}
+
+static inline void
+hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem,
+			      u8 cidr, bool inner)
+{
+	if (inner) {
+		ip6_netmask(&elem->ip[1], cidr);
+		elem->cidr[1] = cidr;
+	} else {
+		ip6_netmask(&elem->ip[0], cidr);
+		elem->cidr[0] = cidr;
+	}
+}
+
+static bool
+hash_netportnet6_data_list(struct sk_buff *skb,
+			  const struct hash_netportnet6_elem *data)
+{
+	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip[0].in6) ||
+	    nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip[1].in6) ||
+	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) ||
+	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static inline void
+hash_netportnet6_data_next(struct hash_netportnet4_elem *next,
+			  const struct hash_netportnet6_elem *d)
+{
+	next->port = d->port;
+}
+
+#undef MTYPE
+#undef PF
+#undef HOST_MASK
+
+#define MTYPE		hash_netportnet6
+#define PF		6
+#define HOST_MASK	128
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		     const struct xt_action_param *par,
+		     enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_netportnet *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netportnet6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+	e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+	if (adt == IPSET_TEST)
+		e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK;
+
+	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
+				 &e.port, &e.proto))
+		return -EINVAL;
+
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6);
+	ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip[1].in6);
+	ip6_netmask(&e.ip[0], e.cidr[0]);
+	ip6_netmask(&e.ip[1], e.cidr[1]);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
+		     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	const struct hash_netportnet *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netportnet6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 port, port_to;
+	bool with_ports = false;
+	int ret;
+
+	e.cidr[0] = e.cidr[1] = HOST_MASK;
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
+		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) ||
+	      ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) ||
+	      ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR])
+		e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+	if (tb[IPSET_ATTR_CIDR2])
+		e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+
+	if (unlikely(!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] ||
+		     e.cidr[1] > HOST_MASK))
+		return -IPSET_ERR_INVALID_CIDR;
+
+	ip6_netmask(&e.ip[0], e.cidr[0]);
+	ip6_netmask(&e.ip[1], e.cidr[1]);
+
+	if (tb[IPSET_ATTR_PORT])
+		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+	else
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
+
+		if (e.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else
+		return -IPSET_ERR_MISSING_PROTO;
+
+	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+		e.port = 0;
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+	}
+
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	port = ntohs(e.port);
+	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+	if (port > port_to)
+		swap(port, port_to);
+
+	if (retried)
+		port = ntohs(h->next.port);
+	for (; port <= port_to; port++) {
+		e.port = htons(port);
+		ret = adtfn(set, &e, &ext, &ext, flags);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+		else
+			ret = 0;
+	}
+	return ret;
+}
+
+static struct ip_set_type hash_netportnet_type __read_mostly = {
+	.name		= "hash:net,port,net",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2 |
+			  IPSET_TYPE_NOMATCH,
+	.dimension	= IPSET_DIM_THREE,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create		= hash_netportnet_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP2]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP2_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_CIDR2]	= { .type = NLA_U8 },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_netportnet_init(void)
+{
+	return ip_set_type_register(&hash_netportnet_type);
+}
+
+static void __exit
+hash_netportnet_fini(void)
+{
+	ip_set_type_unregister(&hash_netportnet_type);
+}
+
+module_init(hash_netportnet_init);
+module_exit(hash_netportnet_fini);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 979b8c9..3e2317f 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -15,12 +15,13 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_list.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    Counters support added */
+#define IPSET_TYPE_REV_MAX	2 /* Comments support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_list:set");
 
 /* Member elements  */
@@ -28,28 +29,6 @@ struct set_elem {
 	ip_set_id_t id;
 };
 
-struct sett_elem {
-	struct {
-		ip_set_id_t id;
-	} __attribute__ ((aligned));
-	unsigned long timeout;
-};
-
-struct setc_elem {
-	struct {
-		ip_set_id_t id;
-	} __attribute__ ((aligned));
-	struct ip_set_counter counter;
-};
-
-struct setct_elem {
-	struct {
-		ip_set_id_t id;
-	} __attribute__ ((aligned));
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 struct set_adt_elem {
 	ip_set_id_t id;
 	ip_set_id_t refid;
@@ -58,24 +37,14 @@ struct set_adt_elem {
 
 /* Type structure */
 struct list_set {
-	size_t dsize;		/* element size */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
 	u32 size;		/* size of set list array */
-	u32 timeout;		/* timeout value */
 	struct timer_list gc;	/* garbage collection */
+	struct net *net;	/* namespace */
 	struct set_elem members[0]; /* the set members */
 };
 
-static inline struct set_elem *
-list_set_elem(const struct list_set *map, u32 id)
-{
-	return (struct set_elem *)((void *)map->members + id * map->dsize);
-}
-
-#define ext_timeout(e, m)	\
-(unsigned long *)((void *)(e) + (m)->offset[IPSET_OFFSET_TIMEOUT])
-#define ext_counter(e, m)	\
-(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_OFFSET_COUNTER])
+#define list_set_elem(set, map, id)	\
+	(struct set_elem *)((void *)(map)->members + (id) * (set)->dsize)
 
 static int
 list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
@@ -92,16 +61,16 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
 	if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
 		opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return 0;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		ret = ip_set_test(e->id, skb, par, opt);
 		if (ret > 0) {
 			if (SET_WITH_COUNTER(set))
-				ip_set_update_counter(ext_counter(e, map),
+				ip_set_update_counter(ext_counter(e, set),
 						      ext, &opt->ext,
 						      cmdflags);
 			return ret;
@@ -121,11 +90,11 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
 	int ret;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return 0;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		ret = ip_set_add(e->id, skb, par, opt);
 		if (ret == 0)
@@ -145,11 +114,11 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
 	int ret;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return 0;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		ret = ip_set_del(e->id, skb, par, opt);
 		if (ret == 0)
@@ -163,8 +132,7 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
 	      const struct xt_action_param *par,
 	      enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	struct list_set *map = set->data;
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	switch (adt) {
 	case IPSET_TEST:
@@ -188,10 +156,10 @@ id_eq(const struct ip_set *set, u32 i, ip_set_id_t id)
 	if (i >= map->size)
 		return 0;
 
-	e = list_set_elem(map, i);
+	e = list_set_elem(set, map, i);
 	return !!(e->id == id &&
 		 !(SET_WITH_TIMEOUT(set) &&
-		   ip_set_timeout_expired(ext_timeout(e, map))));
+		   ip_set_timeout_expired(ext_timeout(e, set))));
 }
 
 static int
@@ -199,28 +167,36 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
 	     const struct ip_set_ext *ext)
 {
 	struct list_set *map = set->data;
-	struct set_elem *e = list_set_elem(map, i);
+	struct set_elem *e = list_set_elem(set, map, i);
 
 	if (e->id != IPSET_INVALID_ID) {
-		if (i == map->size - 1)
+		if (i == map->size - 1) {
 			/* Last element replaced: e.g. add new,before,last */
-			ip_set_put_byindex(e->id);
-		else {
-			struct set_elem *x = list_set_elem(map, map->size - 1);
+			ip_set_put_byindex(map->net, e->id);
+			ip_set_ext_destroy(set, e);
+		} else {
+			struct set_elem *x = list_set_elem(set, map,
+							   map->size - 1);
 
 			/* Last element pushed off */
-			if (x->id != IPSET_INVALID_ID)
-				ip_set_put_byindex(x->id);
-			memmove(list_set_elem(map, i + 1), e,
-				map->dsize * (map->size - (i + 1)));
+			if (x->id != IPSET_INVALID_ID) {
+				ip_set_put_byindex(map->net, x->id);
+				ip_set_ext_destroy(set, x);
+			}
+			memmove(list_set_elem(set, map, i + 1), e,
+				set->dsize * (map->size - (i + 1)));
+			/* Extensions must be initialized to zero */
+			memset(e, 0, set->dsize);
 		}
 	}
 
 	e->id = d->id;
 	if (SET_WITH_TIMEOUT(set))
-		ip_set_timeout_set(ext_timeout(e, map), ext->timeout);
+		ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
 	if (SET_WITH_COUNTER(set))
-		ip_set_init_counter(ext_counter(e, map), ext);
+		ip_set_init_counter(ext_counter(e, set), ext);
+	if (SET_WITH_COMMENT(set))
+		ip_set_init_comment(ext_comment(e, set), ext);
 	return 0;
 }
 
@@ -228,16 +204,17 @@ static int
 list_set_del(struct ip_set *set, u32 i)
 {
 	struct list_set *map = set->data;
-	struct set_elem *e = list_set_elem(map, i);
+	struct set_elem *e = list_set_elem(set, map, i);
 
-	ip_set_put_byindex(e->id);
+	ip_set_put_byindex(map->net, e->id);
+	ip_set_ext_destroy(set, e);
 
 	if (i < map->size - 1)
-		memmove(e, list_set_elem(map, i + 1),
-			map->dsize * (map->size - (i + 1)));
+		memmove(e, list_set_elem(set, map, i + 1),
+			set->dsize * (map->size - (i + 1)));
 
 	/* Last element */
-	e = list_set_elem(map, map->size - 1);
+	e = list_set_elem(set, map, map->size - 1);
 	e->id = IPSET_INVALID_ID;
 	return 0;
 }
@@ -247,13 +224,16 @@ set_cleanup_entries(struct ip_set *set)
 {
 	struct list_set *map = set->data;
 	struct set_elem *e;
-	u32 i;
+	u32 i = 0;
 
-	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+	while (i < map->size) {
+		e = list_set_elem(set, map, i);
 		if (e->id != IPSET_INVALID_ID &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			list_set_del(set, i);
+			/* Check element moved to position i in next loop */
+		else
+			i++;
 	}
 }
 
@@ -268,11 +248,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	int ret;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return 0;
 		else if (SET_WITH_TIMEOUT(set) &&
-			 ip_set_timeout_expired(ext_timeout(e, map)))
+			 ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		else if (e->id != d->id)
 			continue;
@@ -299,14 +279,14 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	bool flag_exist = flags & IPSET_FLAG_EXIST;
 	u32 i, ret = 0;
 
+	if (SET_WITH_TIMEOUT(set))
+		set_cleanup_entries(set);
+
 	/* Check already added element */
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			goto insert;
-		else if (SET_WITH_TIMEOUT(set) &&
-			 ip_set_timeout_expired(ext_timeout(e, map)))
-			continue;
 		else if (e->id != d->id)
 			continue;
 
@@ -319,18 +299,22 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 			/* Can't re-add */
 			return -IPSET_ERR_EXIST;
 		/* Update extensions */
+		ip_set_ext_destroy(set, e);
+
 		if (SET_WITH_TIMEOUT(set))
-			ip_set_timeout_set(ext_timeout(e, map), ext->timeout);
+			ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
 		if (SET_WITH_COUNTER(set))
-			ip_set_init_counter(ext_counter(e, map), ext);
+			ip_set_init_counter(ext_counter(e, set), ext);
+		if (SET_WITH_COMMENT(set))
+			ip_set_init_comment(ext_comment(e, set), ext);
 		/* Set is already added to the list */
-		ip_set_put_byindex(d->id);
+		ip_set_put_byindex(map->net, d->id);
 		return 0;
 	}
 insert:
 	ret = -IPSET_ERR_LIST_FULL;
 	for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			ret = d->before != 0 ? -IPSET_ERR_REF_EXIST
 				: list_set_add(set, i, d, ext);
@@ -355,12 +339,12 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	u32 i;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return d->before != 0 ? -IPSET_ERR_REF_EXIST
 					      : -IPSET_ERR_EXIST;
 		else if (SET_WITH_TIMEOUT(set) &&
-			 ip_set_timeout_expired(ext_timeout(e, map)))
+			 ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		else if (e->id != d->id)
 			continue;
@@ -386,7 +370,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct list_set *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct set_adt_elem e = { .refid = IPSET_INVALID_ID };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	struct ip_set *s;
 	int ret = 0;
 
@@ -403,7 +387,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 	ret = ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
-	e.id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s);
+	e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s);
 	if (e.id == IPSET_INVALID_ID)
 		return -IPSET_ERR_NAME;
 	/* "Loop detection" */
@@ -423,7 +407,8 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 	}
 
 	if (tb[IPSET_ATTR_NAMEREF]) {
-		e.refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]),
+		e.refid = ip_set_get_byname(map->net,
+					    nla_data(tb[IPSET_ATTR_NAMEREF]),
 					    &s);
 		if (e.refid == IPSET_INVALID_ID) {
 			ret = -IPSET_ERR_NAMEREF;
@@ -439,9 +424,9 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 
 finish:
 	if (e.refid != IPSET_INVALID_ID)
-		ip_set_put_byindex(e.refid);
+		ip_set_put_byindex(map->net, e.refid);
 	if (adt != IPSET_ADD || ret)
-		ip_set_put_byindex(e.id);
+		ip_set_put_byindex(map->net, e.id);
 
 	return ip_set_eexist(ret, flags) ? 0 : ret;
 }
@@ -454,9 +439,10 @@ list_set_flush(struct ip_set *set)
 	u32 i;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id != IPSET_INVALID_ID) {
-			ip_set_put_byindex(e->id);
+			ip_set_put_byindex(map->net, e->id);
+			ip_set_ext_destroy(set, e);
 			e->id = IPSET_INVALID_ID;
 		}
 	}
@@ -485,14 +471,11 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
 	if (!nested)
 		goto nla_put_failure;
 	if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
-	    (SET_WITH_TIMEOUT(set) &&
-	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) ||
-	    (SET_WITH_COUNTER(set) &&
-	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
-			   htonl(IPSET_FLAG_WITH_COUNTERS))) ||
 	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
-			  htonl(sizeof(*map) + map->size * map->dsize)))
+			  htonl(sizeof(*map) + map->size * set->dsize)))
+		goto nla_put_failure;
+	if (unlikely(ip_set_put_flags(skb, set)))
 		goto nla_put_failure;
 	ipset_nest_end(skb, nested);
 
@@ -507,19 +490,20 @@ list_set_list(const struct ip_set *set,
 {
 	const struct list_set *map = set->data;
 	struct nlattr *atd, *nested;
-	u32 i, first = cb->args[2];
+	u32 i, first = cb->args[IPSET_CB_ARG0];
 	const struct set_elem *e;
 
 	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
 	if (!atd)
 		return -EMSGSIZE;
-	for (; cb->args[2] < map->size; cb->args[2]++) {
-		i = cb->args[2];
-		e = list_set_elem(map, i);
+	for (; cb->args[IPSET_CB_ARG0] < map->size;
+	     cb->args[IPSET_CB_ARG0]++) {
+		i = cb->args[IPSET_CB_ARG0];
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			goto finish;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 		if (!nested) {
@@ -530,31 +514,25 @@ list_set_list(const struct ip_set *set,
 				goto nla_put_failure;
 		}
 		if (nla_put_string(skb, IPSET_ATTR_NAME,
-				   ip_set_name_byindex(e->id)))
-			goto nla_put_failure;
-		if (SET_WITH_TIMEOUT(set) &&
-		    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-				  htonl(ip_set_timeout_get(
-						ext_timeout(e, map)))))
+				   ip_set_name_byindex(map->net, e->id)))
 			goto nla_put_failure;
-		if (SET_WITH_COUNTER(set) &&
-		    ip_set_put_counter(skb, ext_counter(e, map)))
+		if (ip_set_put_extensions(skb, set, e, true))
 			goto nla_put_failure;
 		ipset_nest_end(skb, nested);
 	}
 finish:
 	ipset_nest_end(skb, atd);
 	/* Set listing finished */
-	cb->args[2] = 0;
+	cb->args[IPSET_CB_ARG0] = 0;
 	return 0;
 
 nla_put_failure:
 	nla_nest_cancel(skb, nested);
-	ipset_nest_end(skb, atd);
 	if (unlikely(i == first)) {
-		cb->args[2] = 0;
+		cb->args[IPSET_CB_ARG0] = 0;
 		return -EMSGSIZE;
 	}
+	ipset_nest_end(skb, atd);
 	return 0;
 }
 
@@ -565,7 +543,7 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
 	const struct list_set *y = b->data;
 
 	return x->size == y->size &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 	       a->extensions == b->extensions;
 }
 
@@ -594,7 +572,7 @@ list_set_gc(unsigned long ul_set)
 	set_cleanup_entries(set);
 	write_unlock_bh(&set->lock);
 
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
 }
 
@@ -606,43 +584,40 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	init_timer(&map->gc);
 	map->gc.data = (unsigned long) set;
 	map->gc.function = gc;
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
 }
 
 /* Create list:set type of sets */
 
-static struct list_set *
-init_list_set(struct ip_set *set, u32 size, size_t dsize,
-	      unsigned long timeout)
+static bool
+init_list_set(struct net *net, struct ip_set *set, u32 size)
 {
 	struct list_set *map;
 	struct set_elem *e;
 	u32 i;
 
-	map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL);
+	map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL);
 	if (!map)
-		return NULL;
+		return false;
 
 	map->size = size;
-	map->dsize = dsize;
-	map->timeout = timeout;
+	map->net = net;
 	set->data = map;
 
 	for (i = 0; i < size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		e->id = IPSET_INVALID_ID;
 	}
 
-	return map;
+	return true;
 }
 
 static int
-list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+		u32 flags)
 {
-	struct list_set *map;
-	u32 size = IP_SET_LIST_DEFAULT_SIZE, cadt_flags = 0;
-	unsigned long timeout = 0;
+	u32 size = IP_SET_LIST_DEFAULT_SIZE;
 
 	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
@@ -654,45 +629,13 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	if (size < IP_SET_LIST_MIN_SIZE)
 		size = IP_SET_LIST_MIN_SIZE;
 
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (tb[IPSET_ATTR_TIMEOUT])
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	set->variant = &set_variant;
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			map = init_list_set(set, size,
-					sizeof(struct setct_elem), timeout);
-			if (!map)
-				return -ENOMEM;
-			set->extensions |= IPSET_EXT_TIMEOUT;
-			map->offset[IPSET_OFFSET_TIMEOUT] =
-				offsetof(struct setct_elem, timeout);
-			map->offset[IPSET_OFFSET_COUNTER] =
-				offsetof(struct setct_elem, counter);
-			list_set_gc_init(set, list_set_gc);
-		} else {
-			map = init_list_set(set, size,
-					    sizeof(struct setc_elem), 0);
-			if (!map)
-				return -ENOMEM;
-			map->offset[IPSET_OFFSET_COUNTER] =
-				offsetof(struct setc_elem, counter);
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		map = init_list_set(set, size,
-				    sizeof(struct sett_elem), timeout);
-		if (!map)
-			return -ENOMEM;
-		set->extensions |= IPSET_EXT_TIMEOUT;
-		map->offset[IPSET_OFFSET_TIMEOUT] =
-			offsetof(struct sett_elem, timeout);
+	set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem));
+	if (!init_list_set(net, set, size))
+		return -ENOMEM;
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		list_set_gc_init(set, list_set_gc);
-	} else {
-		map = init_list_set(set, size, sizeof(struct set_elem), 0);
-		if (!map)
-			return -ENOMEM;
 	}
 	return 0;
 }
@@ -703,8 +646,8 @@ static struct ip_set_type list_set_type __read_mostly = {
 	.features	= IPSET_TYPE_NAME | IPSET_DUMP_LAST,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= list_set_create,
 	.create_policy	= {
 		[IPSET_ATTR_SIZE]	= { .type = NLA_U32 },
@@ -721,6 +664,7 @@ static struct ip_set_type list_set_type __read_mostly = {
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
 		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
 	},
 	.me		= THIS_MODULE,
 };
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index a083bda..4c8e5c0 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -975,8 +975,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 				return cp;
 			}
 		}
-		rcu_read_unlock();
-		rcu_read_lock();
+		cond_resched_rcu();
 	}
 
 	return NULL;
@@ -1015,8 +1014,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 			iter->l = &ip_vs_conn_tab[idx];
 			return cp;
 		}
-		rcu_read_unlock();
-		rcu_read_lock();
+		cond_resched_rcu();
 	}
 	iter->l = NULL;
 	return NULL;
@@ -1206,17 +1204,13 @@ void ip_vs_random_dropentry(struct net *net)
 	int idx;
 	struct ip_vs_conn *cp, *cp_c;
 
+	rcu_read_lock();
 	/*
 	 * Randomly scan 1/32 of the whole table every second
 	 */
 	for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
 		unsigned int hash = net_random() & ip_vs_conn_tab_mask;
 
-		/*
-		 *  Lock is actually needed in this loop.
-		 */
-		rcu_read_lock();
-
 		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
 			if (cp->flags & IP_VS_CONN_F_TEMPLATE)
 				/* connection template */
@@ -1237,6 +1231,18 @@ void ip_vs_random_dropentry(struct net *net)
 				default:
 					continue;
 				}
+			} else if (cp->protocol == IPPROTO_SCTP) {
+				switch (cp->state) {
+				case IP_VS_SCTP_S_INIT1:
+				case IP_VS_SCTP_S_INIT:
+					break;
+				case IP_VS_SCTP_S_ESTABLISHED:
+					if (todrop_entry(cp))
+						break;
+					continue;
+				default:
+					continue;
+				}
 			} else {
 				if (!todrop_entry(cp))
 					continue;
@@ -1252,8 +1258,9 @@ void ip_vs_random_dropentry(struct net *net)
 				__ip_vs_conn_put(cp);
 			}
 		}
-		rcu_read_unlock();
+		cond_resched_rcu();
 	}
+	rcu_read_unlock();
 }
 
 
@@ -1267,11 +1274,8 @@ static void ip_vs_conn_flush(struct net *net)
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
 flush_again:
+	rcu_read_lock();
 	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
-		/*
-		 *  Lock is actually needed in this loop.
-		 */
-		rcu_read_lock();
 
 		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
 			if (!ip_vs_conn_net_eq(cp, net))
@@ -1286,8 +1290,9 @@ flush_again:
 				__ip_vs_conn_put(cp);
 			}
 		}
-		rcu_read_unlock();
+		cond_resched_rcu();
 	}
+	rcu_read_unlock();
 
 	/* the counter may be not NULL, because maybe some conn entries
 	   are run by slow timer handler or unhashed but still referred */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 23b8eb5..4f26ee4 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -116,6 +116,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 
 	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		struct ip_vs_cpu_stats *s;
+		struct ip_vs_service *svc;
 
 		s = this_cpu_ptr(dest->stats.cpustats);
 		s->ustats.inpkts++;
@@ -123,11 +124,14 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 		s->ustats.inbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
 
-		s = this_cpu_ptr(dest->svc->stats.cpustats);
+		rcu_read_lock();
+		svc = rcu_dereference(dest->svc);
+		s = this_cpu_ptr(svc->stats.cpustats);
 		s->ustats.inpkts++;
 		u64_stats_update_begin(&s->syncp);
 		s->ustats.inbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
+		rcu_read_unlock();
 
 		s = this_cpu_ptr(ipvs->tot_stats.cpustats);
 		s->ustats.inpkts++;
@@ -146,6 +150,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 
 	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		struct ip_vs_cpu_stats *s;
+		struct ip_vs_service *svc;
 
 		s = this_cpu_ptr(dest->stats.cpustats);
 		s->ustats.outpkts++;
@@ -153,11 +158,14 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 		s->ustats.outbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
 
-		s = this_cpu_ptr(dest->svc->stats.cpustats);
+		rcu_read_lock();
+		svc = rcu_dereference(dest->svc);
+		s = this_cpu_ptr(svc->stats.cpustats);
 		s->ustats.outpkts++;
 		u64_stats_update_begin(&s->syncp);
 		s->ustats.outbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
+		rcu_read_unlock();
 
 		s = this_cpu_ptr(ipvs->tot_stats.cpustats);
 		s->ustats.outpkts++;
@@ -305,7 +313,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		 * return *ignored=0 i.e. ICMP and NF_DROP
 		 */
 		sched = rcu_dereference(svc->scheduler);
-		dest = sched->schedule(svc, skb);
+		dest = sched->schedule(svc, skb, iph);
 		if (!dest) {
 			IP_VS_DBG(1, "p-schedule: no dest found.\n");
 			kfree(param.pe_data);
@@ -452,7 +460,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	}
 
 	sched = rcu_dereference(svc->scheduler);
-	dest = sched->schedule(svc, skb);
+	dest = sched->schedule(svc, skb, iph);
 	if (dest == NULL) {
 		IP_VS_DBG(1, "Schedule: no dest found.\n");
 		return NULL;
@@ -1131,12 +1139,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 	ip_vs_fill_iph_skb(af, skb, &iph);
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
-		if (!iph.fragoffs && skb_nfct_reasm(skb)) {
-			struct sk_buff *reasm = skb_nfct_reasm(skb);
-			/* Save fw mark for coming frags */
-			reasm->ipvs_property = 1;
-			reasm->mark = skb->mark;
-		}
 		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
 			int related;
 			int verdict = ip_vs_out_icmp_v6(skb, &related,
@@ -1231,11 +1233,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	     const struct net_device *in, const struct net_device *out,
 	     int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_out(hooknum, skb, AF_INET);
+	return ip_vs_out(ops->hooknum, skb, AF_INET);
 }
 
 /*
@@ -1243,11 +1245,11 @@ ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		   const struct net_device *in, const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_out(hooknum, skb, AF_INET);
+	return ip_vs_out(ops->hooknum, skb, AF_INET);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -1258,11 +1260,11 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	     const struct net_device *in, const struct net_device *out,
 	     int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_out(hooknum, skb, AF_INET6);
+	return ip_vs_out(ops->hooknum, skb, AF_INET6);
 }
 
 /*
@@ -1270,11 +1272,11 @@ ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		   const struct net_device *in, const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_out(hooknum, skb, AF_INET6);
+	return ip_vs_out(ops->hooknum, skb, AF_INET6);
 }
 
 #endif
@@ -1606,12 +1608,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
-		if (!iph.fragoffs && skb_nfct_reasm(skb)) {
-			struct sk_buff *reasm = skb_nfct_reasm(skb);
-			/* Save fw mark for coming frags. */
-			reasm->ipvs_property = 1;
-			reasm->mark = skb->mark;
-		}
 		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
 			int related;
 			int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
@@ -1663,9 +1659,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 		/* sorry, all this trouble for a no-hit :) */
 		IP_VS_DBG_PKT(12, af, pp, skb, 0,
 			      "ip_vs_in: packet continues traversal as normal");
-		if (iph.fragoffs && !skb_nfct_reasm(skb)) {
+		if (iph.fragoffs) {
 			/* Fragment that couldn't be mapped to a conn entry
-			 * and don't have any pointer to a reasm skb
 			 * is missing module nf_defrag_ipv6
 			 */
 			IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
@@ -1725,12 +1720,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
  *	Schedule and forward packets from remote clients
  */
 static unsigned int
-ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		      const struct net_device *in,
 		      const struct net_device *out,
 		      int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_in(hooknum, skb, AF_INET);
+	return ip_vs_in(ops->hooknum, skb, AF_INET);
 }
 
 /*
@@ -1738,58 +1733,26 @@ ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
  *	Schedule and forward packets from local clients
  */
 static unsigned int
-ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_in(hooknum, skb, AF_INET);
+	return ip_vs_in(ops->hooknum, skb, AF_INET);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
 
 /*
- * AF_INET6 fragment handling
- * Copy info from first fragment, to the rest of them.
- */
-static unsigned int
-ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
-		     const struct net_device *in,
-		     const struct net_device *out,
-		     int (*okfn)(struct sk_buff *))
-{
-	struct sk_buff *reasm = skb_nfct_reasm(skb);
-	struct net *net;
-
-	/* Skip if not a "replay" from nf_ct_frag6_output or first fragment.
-	 * ipvs_property is set when checking first fragment
-	 * in ip_vs_in() and ip_vs_out().
-	 */
-	if (reasm)
-		IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property);
-	if (!reasm || !reasm->ipvs_property)
-		return NF_ACCEPT;
-
-	net = skb_net(skb);
-	if (!net_ipvs(net)->enable)
-		return NF_ACCEPT;
-
-	/* Copy stored fw mark, saved in ip_vs_{in,out} */
-	skb->mark = reasm->mark;
-
-	return NF_ACCEPT;
-}
-
-/*
  *	AF_INET6 handler in NF_INET_LOCAL_IN chain
  *	Schedule and forward packets from remote clients
  */
 static unsigned int
-ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		      const struct net_device *in,
 		      const struct net_device *out,
 		      int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_in(hooknum, skb, AF_INET6);
+	return ip_vs_in(ops->hooknum, skb, AF_INET6);
 }
 
 /*
@@ -1797,11 +1760,11 @@ ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
  *	Schedule and forward packets from local clients
  */
 static unsigned int
-ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_in(hooknum, skb, AF_INET6);
+	return ip_vs_in(ops->hooknum, skb, AF_INET6);
 }
 
 #endif
@@ -1817,7 +1780,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
  *      and send them to ip_vs_in_icmp.
  */
 static unsigned int
-ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		   const struct net_device *in, const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
@@ -1834,12 +1797,12 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
 	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
-	return ip_vs_in_icmp(skb, &r, hooknum);
+	return ip_vs_in_icmp(skb, &r, ops->hooknum);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
 static unsigned int
-ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		      const struct net_device *in, const struct net_device *out,
 		      int (*okfn)(struct sk_buff *))
 {
@@ -1858,7 +1821,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
 	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
-	return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr);
+	return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr);
 }
 #endif
 
@@ -1916,14 +1879,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.priority	= 100,
 	},
 #ifdef CONFIG_IP_VS_IPV6
-	/* After mangle & nat fetch 2:nd fragment and following */
-	{
-		.hook		= ip_vs_preroute_frag6,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_PRE_ROUTING,
-		.priority	= NF_IP6_PRI_NAT_DST + 1,
-	},
 	/* After packet filtering, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_reply6,
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 9e6c2a0..35be035 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -460,7 +460,7 @@ static inline void
 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 {
 	atomic_inc(&svc->refcnt);
-	dest->svc = svc;
+	rcu_assign_pointer(dest->svc, svc);
 }
 
 static void ip_vs_service_free(struct ip_vs_service *svc)
@@ -470,18 +470,25 @@ static void ip_vs_service_free(struct ip_vs_service *svc)
 	kfree(svc);
 }
 
-static void
-__ip_vs_unbind_svc(struct ip_vs_dest *dest)
+static void ip_vs_service_rcu_free(struct rcu_head *head)
 {
-	struct ip_vs_service *svc = dest->svc;
+	struct ip_vs_service *svc;
 
-	dest->svc = NULL;
+	svc = container_of(head, struct ip_vs_service, rcu_head);
+	ip_vs_service_free(svc);
+}
+
+static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
+{
 	if (atomic_dec_and_test(&svc->refcnt)) {
 		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
 			      svc->fwmark,
 			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
 			      ntohs(svc->port));
-		ip_vs_service_free(svc);
+		if (do_delay)
+			call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
+		else
+			ip_vs_service_free(svc);
 	}
 }
 
@@ -667,11 +674,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 			      IP_VS_DBG_ADDR(svc->af, &dest->addr),
 			      ntohs(dest->port),
 			      atomic_read(&dest->refcnt));
-		/* We can not reuse dest while in grace period
-		 * because conns still can use dest->svc
-		 */
-		if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
-			continue;
 		if (dest->af == svc->af &&
 		    ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
 		    dest->port == dport &&
@@ -697,10 +699,12 @@ out:
 
 static void ip_vs_dest_free(struct ip_vs_dest *dest)
 {
+	struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
+
 	__ip_vs_dst_cache_reset(dest);
-	__ip_vs_unbind_svc(dest);
+	__ip_vs_svc_put(svc, false);
 	free_percpu(dest->stats.cpustats);
-	kfree(dest);
+	ip_vs_dest_put_and_free(dest);
 }
 
 /*
@@ -771,6 +775,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 		    struct ip_vs_dest_user_kern *udest, int add)
 {
 	struct netns_ipvs *ipvs = net_ipvs(svc->net);
+	struct ip_vs_service *old_svc;
 	struct ip_vs_scheduler *sched;
 	int conn_flags;
 
@@ -792,13 +797,14 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 	atomic_set(&dest->conn_flags, conn_flags);
 
 	/* bind the service */
-	if (!dest->svc) {
+	old_svc = rcu_dereference_protected(dest->svc, 1);
+	if (!old_svc) {
 		__ip_vs_bind_svc(dest, svc);
 	} else {
-		if (dest->svc != svc) {
-			__ip_vs_unbind_svc(dest);
+		if (old_svc != svc) {
 			ip_vs_zero_stats(&dest->stats);
 			__ip_vs_bind_svc(dest, svc);
+			__ip_vs_svc_put(old_svc, true);
 		}
 	}
 
@@ -836,7 +842,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 	       struct ip_vs_dest **dest_p)
 {
 	struct ip_vs_dest *dest;
-	unsigned int atype;
+	unsigned int atype, i;
 
 	EnterFunction(2);
 
@@ -863,6 +869,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 	if (!dest->stats.cpustats)
 		goto err_alloc;
 
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *ip_vs_dest_stats;
+		ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i);
+		u64_stats_init(&ip_vs_dest_stats->syncp);
+	}
+
 	dest->af = svc->af;
 	dest->protocol = svc->protocol;
 	dest->vaddr = svc->addr;
@@ -998,16 +1010,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	return 0;
 }
 
-static void ip_vs_dest_wait_readers(struct rcu_head *head)
-{
-	struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest,
-					       rcu_head);
-
-	/* End of grace period after unlinking */
-	clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
-}
-
-
 /*
  *	Delete a destination (must be already unlinked from the service)
  */
@@ -1023,20 +1025,16 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
 	 */
 	ip_vs_rs_unhash(dest);
 
-	if (!cleanup) {
-		set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
-		call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);
-	}
-
 	spin_lock_bh(&ipvs->dest_trash_lock);
 	IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
 		      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
 		      atomic_read(&dest->refcnt));
 	if (list_empty(&ipvs->dest_trash) && !cleanup)
 		mod_timer(&ipvs->dest_trash_timer,
-			  jiffies + IP_VS_DEST_TRASH_PERIOD);
+			  jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
 	/* dest lives in trash without reference */
 	list_add(&dest->t_list, &ipvs->dest_trash);
+	dest->idle_start = 0;
 	spin_unlock_bh(&ipvs->dest_trash_lock);
 	ip_vs_dest_put(dest);
 }
@@ -1108,24 +1106,30 @@ static void ip_vs_dest_trash_expire(unsigned long data)
 	struct net *net = (struct net *) data;
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_dest *dest, *next;
+	unsigned long now = jiffies;
 
 	spin_lock(&ipvs->dest_trash_lock);
 	list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
-		/* Skip if dest is in grace period */
-		if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
-			continue;
 		if (atomic_read(&dest->refcnt) > 0)
 			continue;
+		if (dest->idle_start) {
+			if (time_before(now, dest->idle_start +
+					     IP_VS_DEST_TRASH_PERIOD))
+				continue;
+		} else {
+			dest->idle_start = max(1UL, now);
+			continue;
+		}
 		IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
 			      dest->vfwmark,
-			      IP_VS_DBG_ADDR(dest->svc->af, &dest->addr),
+			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
 			      ntohs(dest->port));
 		list_del(&dest->t_list);
 		ip_vs_dest_free(dest);
 	}
 	if (!list_empty(&ipvs->dest_trash))
 		mod_timer(&ipvs->dest_trash_timer,
-			  jiffies + IP_VS_DEST_TRASH_PERIOD);
+			  jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
 	spin_unlock(&ipvs->dest_trash_lock);
 }
 
@@ -1136,7 +1140,7 @@ static int
 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 		  struct ip_vs_service **svc_p)
 {
-	int ret = 0;
+	int ret = 0, i;
 	struct ip_vs_scheduler *sched = NULL;
 	struct ip_vs_pe *pe = NULL;
 	struct ip_vs_service *svc = NULL;
@@ -1186,6 +1190,13 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 		goto out_err;
 	}
 
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *ip_vs_stats;
+		ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i);
+		u64_stats_init(&ip_vs_stats->syncp);
+	}
+
+
 	/* I'm the first user of the service */
 	atomic_set(&svc->refcnt, 0);
 
@@ -1320,14 +1331,6 @@ out:
 	return ret;
 }
 
-static void ip_vs_service_rcu_free(struct rcu_head *head)
-{
-	struct ip_vs_service *svc;
-
-	svc = container_of(head, struct ip_vs_service, rcu_head);
-	ip_vs_service_free(svc);
-}
-
 /*
  *	Delete a service from the service list
  *	- The service must be unlinked, unlocked and not referenced!
@@ -1376,13 +1379,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
 	/*
 	 *    Free the service if nobody refers to it
 	 */
-	if (atomic_dec_and_test(&svc->refcnt)) {
-		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
-			      svc->fwmark,
-			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
-			      ntohs(svc->port));
-		call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
-	}
+	__ip_vs_svc_put(svc, true);
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
@@ -1487,9 +1484,9 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
  * Currently only NETDEV_DOWN is handled to release refs to cached dsts
  */
 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
-			    void *ptr)
+			   void *ptr)
 {
-	struct net_device *dev = ptr;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct net *net = dev_net(dev);
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_service *svc;
@@ -1575,7 +1572,7 @@ static int zero;
 static int three = 3;
 
 static int
-proc_do_defense_mode(ctl_table *table, int write,
+proc_do_defense_mode(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net = current->nsproxy->net_ns;
@@ -1596,7 +1593,7 @@ proc_do_defense_mode(ctl_table *table, int write,
 }
 
 static int
-proc_do_sync_threshold(ctl_table *table, int write,
+proc_do_sync_threshold(struct ctl_table *table, int write,
 		       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = table->data;
@@ -1616,7 +1613,7 @@ proc_do_sync_threshold(ctl_table *table, int write,
 }
 
 static int
-proc_do_sync_mode(ctl_table *table, int write,
+proc_do_sync_mode(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = table->data;
@@ -1634,7 +1631,7 @@ proc_do_sync_mode(ctl_table *table, int write,
 }
 
 static int
-proc_do_sync_ports(ctl_table *table, int write,
+proc_do_sync_ports(struct ctl_table *table, int write,
 		   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = table->data;
@@ -1715,12 +1712,18 @@ static struct ctl_table vs_vars[] = {
 		.proc_handler	= &proc_do_sync_ports,
 	},
 	{
-		.procname	= "sync_qlen_max",
+		.procname	= "sync_persist_mode",
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 	{
+		.procname	= "sync_qlen_max",
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+	{
 		.procname	= "sync_sock_size",
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
@@ -1739,6 +1742,18 @@ static struct ctl_table vs_vars[] = {
 		.proc_handler	= proc_dointvec,
 	},
 	{
+		.procname	= "sloppy_tcp",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sloppy_sctp",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
 		.procname	= "expire_quiescent_template",
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
@@ -3565,7 +3580,7 @@ out:
 }
 
 
-static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
+static const struct genl_ops ip_vs_genl_ops[] __read_mostly = {
 	{
 		.cmd	= IPVS_CMD_NEW_SERVICE,
 		.flags	= GENL_ADMIN_PERM,
@@ -3664,7 +3679,7 @@ static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
 static int __init ip_vs_genl_register(void)
 {
 	return genl_register_family_with_ops(&ip_vs_genl_family,
-		ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
+					     ip_vs_genl_ops);
 }
 
 static void ip_vs_genl_unregister(void)
@@ -3717,12 +3732,15 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	tbl[idx++].data = &ipvs->sysctl_sync_ver;
 	ipvs->sysctl_sync_ports = 1;
 	tbl[idx++].data = &ipvs->sysctl_sync_ports;
+	tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
 	ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
 	tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
 	ipvs->sysctl_sync_sock_size = 0;
 	tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
 	tbl[idx++].data = &ipvs->sysctl_cache_bypass;
 	tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
+	tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
+	tbl[idx++].data = &ipvs->sysctl_sloppy_sctp;
 	tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
 	ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
 	ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
@@ -3775,7 +3793,7 @@ static struct notifier_block ip_vs_dst_notifier = {
 
 int __net_init ip_vs_control_net_init(struct net *net)
 {
-	int idx;
+	int i, idx;
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	/* Initialize rs_table */
@@ -3794,6 +3812,12 @@ int __net_init ip_vs_control_net_init(struct net *net)
 	if (!ipvs->tot_stats.cpustats)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *ipvs_tot_stats;
+		ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i);
+		u64_stats_init(&ipvs_tot_stats->syncp);
+	}
+
 	spin_lock_init(&ipvs->tot_stats.lock);
 
 	proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops);
@@ -3815,10 +3839,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	/* Some dest can be in grace period even before cleanup, we have to
-	 * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called.
-	 */
-	rcu_barrier();
 	ip_vs_trash_cleanup(net);
 	ip_vs_stop_estimator(net, &ipvs->tot_stats);
 	ip_vs_control_net_cleanup_sysctl(net);
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index ccab120..c3b8454 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -214,18 +214,16 @@ static inline int is_overloaded(struct ip_vs_dest *dest)
  *      Destination hashing scheduling
  */
 static struct ip_vs_dest *
-ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		  struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_dest *dest;
 	struct ip_vs_dh_state *s;
-	struct ip_vs_iphdr iph;
-
-	ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
 
 	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
 	s = (struct ip_vs_dh_state *) svc->sched_data;
-	dest = ip_vs_dh_get(svc->af, s, &iph.daddr);
+	dest = ip_vs_dh_get(svc->af, s, &iph->daddr);
 	if (!dest
 	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
 	    || atomic_read(&dest->weight) <= 0
@@ -235,7 +233,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	}
 
 	IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
-		      IP_VS_DBG_ADDR(svc->af, &iph.daddr),
+		      IP_VS_DBG_ADDR(svc->af, &iph->daddr),
 		      IP_VS_DBG_ADDR(svc->af, &dest->addr),
 		      ntohs(dest->port));
 
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 6bee6d0..1425e9a 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -59,12 +59,13 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
 				 struct ip_vs_cpu_stats __percpu *stats)
 {
 	int i;
+	bool add = false;
 
 	for_each_possible_cpu(i) {
 		struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
 		unsigned int start;
 		__u64 inbytes, outbytes;
-		if (i) {
+		if (add) {
 			sum->conns += s->ustats.conns;
 			sum->inpkts += s->ustats.inpkts;
 			sum->outpkts += s->ustats.outpkts;
@@ -76,6 +77,7 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
 			sum->inbytes += inbytes;
 			sum->outbytes += outbytes;
 		} else {
+			add = true;
 			sum->conns = s->ustats.conns;
 			sum->inpkts = s->ustats.inpkts;
 			sum->outpkts = s->ustats.outpkts;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 5ea26bd..ca056a3 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -93,7 +93,7 @@ struct ip_vs_lblc_entry {
 	struct hlist_node	list;
 	int			af;		/* address family */
 	union nf_inet_addr      addr;           /* destination IP address */
-	struct ip_vs_dest __rcu	*dest;          /* real server (cache) */
+	struct ip_vs_dest	*dest;          /* real server (cache) */
 	unsigned long           lastuse;        /* last used time */
 	struct rcu_head		rcu_head;
 };
@@ -118,7 +118,7 @@ struct ip_vs_lblc_table {
  *      IPVS LBLC sysctl table
  */
 #ifdef CONFIG_SYSCTL
-static ctl_table vs_vars_table[] = {
+static struct ctl_table vs_vars_table[] = {
 	{
 		.procname	= "lblc_expiration",
 		.data		= NULL,
@@ -130,20 +130,21 @@ static ctl_table vs_vars_table[] = {
 };
 #endif
 
-static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
+static void ip_vs_lblc_rcu_free(struct rcu_head *head)
 {
-	struct ip_vs_dest *dest;
+	struct ip_vs_lblc_entry *en = container_of(head,
+						   struct ip_vs_lblc_entry,
+						   rcu_head);
 
-	hlist_del_rcu(&en->list);
-	/*
-	 * We don't kfree dest because it is referred either by its service
-	 * or the trash dest list.
-	 */
-	dest = rcu_dereference_protected(en->dest, 1);
-	ip_vs_dest_put(dest);
-	kfree_rcu(en, rcu_head);
+	ip_vs_dest_put_and_free(en->dest);
+	kfree(en);
 }
 
+static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en)
+{
+	hlist_del_rcu(&en->list);
+	call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free);
+}
 
 /*
  *	Returns hash value for IPVS LBLC entry
@@ -203,30 +204,23 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
 	struct ip_vs_lblc_entry *en;
 
 	en = ip_vs_lblc_get(dest->af, tbl, daddr);
-	if (!en) {
-		en = kmalloc(sizeof(*en), GFP_ATOMIC);
-		if (!en)
-			return NULL;
-
-		en->af = dest->af;
-		ip_vs_addr_copy(dest->af, &en->addr, daddr);
-		en->lastuse = jiffies;
+	if (en) {
+		if (en->dest == dest)
+			return en;
+		ip_vs_lblc_del(en);
+	}
+	en = kmalloc(sizeof(*en), GFP_ATOMIC);
+	if (!en)
+		return NULL;
 
-		ip_vs_dest_hold(dest);
-		RCU_INIT_POINTER(en->dest, dest);
+	en->af = dest->af;
+	ip_vs_addr_copy(dest->af, &en->addr, daddr);
+	en->lastuse = jiffies;
 
-		ip_vs_lblc_hash(tbl, en);
-	} else {
-		struct ip_vs_dest *old_dest;
+	ip_vs_dest_hold(dest);
+	en->dest = dest;
 
-		old_dest = rcu_dereference_protected(en->dest, 1);
-		if (old_dest != dest) {
-			ip_vs_dest_put(old_dest);
-			ip_vs_dest_hold(dest);
-			/* No ordering constraints for refcnt */
-			RCU_INIT_POINTER(en->dest, dest);
-		}
-	}
+	ip_vs_lblc_hash(tbl, en);
 
 	return en;
 }
@@ -246,7 +240,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
 	tbl->dead = 1;
 	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
 		hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
-			ip_vs_lblc_free(en);
+			ip_vs_lblc_del(en);
 			atomic_dec(&tbl->entries);
 		}
 	}
@@ -281,7 +275,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
 					sysctl_lblc_expiration(svc)))
 				continue;
 
-			ip_vs_lblc_free(en);
+			ip_vs_lblc_del(en);
 			atomic_dec(&tbl->entries);
 		}
 		spin_unlock(&svc->sched_lock);
@@ -335,7 +329,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
 			if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
 				continue;
 
-			ip_vs_lblc_free(en);
+			ip_vs_lblc_del(en);
 			atomic_dec(&tbl->entries);
 			goal--;
 		}
@@ -443,8 +437,8 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
 			continue;
 
 		doh = ip_vs_dest_conn_overhead(dest);
-		if (loh * atomic_read(&dest->weight) >
-		    doh * atomic_read(&least->weight)) {
+		if ((__s64)loh * atomic_read(&dest->weight) >
+		    (__s64)doh * atomic_read(&least->weight)) {
 			least = dest;
 			loh = doh;
 		}
@@ -487,19 +481,17 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
  *    Locality-Based (weighted) Least-Connection scheduling
  */
 static struct ip_vs_dest *
-ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		    struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_lblc_table *tbl = svc->sched_data;
-	struct ip_vs_iphdr iph;
 	struct ip_vs_dest *dest = NULL;
 	struct ip_vs_lblc_entry *en;
 
-	ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
-
 	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
 	/* First look in our cache */
-	en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr);
+	en = ip_vs_lblc_get(svc->af, tbl, &iph->daddr);
 	if (en) {
 		/* We only hold a read lock, but this is atomic */
 		en->lastuse = jiffies;
@@ -513,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		 * free up entries from the trash at any time.
 		 */
 
-		dest = rcu_dereference(en->dest);
+		dest = en->dest;
 		if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
 		    atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
 			goto out;
@@ -529,12 +521,12 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	/* If we fail to create a cache entry, we'll just use the valid dest */
 	spin_lock_bh(&svc->sched_lock);
 	if (!tbl->dead)
-		ip_vs_lblc_new(tbl, &iph.daddr, dest);
+		ip_vs_lblc_new(tbl, &iph->daddr, dest);
 	spin_unlock_bh(&svc->sched_lock);
 
 out:
 	IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
-		      IP_VS_DBG_ADDR(svc->af, &iph.daddr),
+		      IP_VS_DBG_ADDR(svc->af, &iph->daddr),
 		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
 
 	return dest;
@@ -633,7 +625,7 @@ static void __exit ip_vs_lblc_cleanup(void)
 {
 	unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
 	unregister_pernet_subsys(&ip_vs_lblc_ops);
-	synchronize_rcu();
+	rcu_barrier();
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 50123c2..3f21a2f 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -89,7 +89,7 @@
  */
 struct ip_vs_dest_set_elem {
 	struct list_head	list;          /* list link */
-	struct ip_vs_dest __rcu *dest;         /* destination server */
+	struct ip_vs_dest	*dest;		/* destination server */
 	struct rcu_head		rcu_head;
 };
 
@@ -107,11 +107,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
 
 	if (check) {
 		list_for_each_entry(e, &set->list, list) {
-			struct ip_vs_dest *d;
-
-			d = rcu_dereference_protected(e->dest, 1);
-			if (d == dest)
-				/* already existed */
+			if (e->dest == dest)
 				return;
 		}
 	}
@@ -121,7 +117,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
 		return;
 
 	ip_vs_dest_hold(dest);
-	RCU_INIT_POINTER(e->dest, dest);
+	e->dest = dest;
 
 	list_add_rcu(&e->list, &set->list);
 	atomic_inc(&set->size);
@@ -129,22 +125,27 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
 	set->lastmod = jiffies;
 }
 
+static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head)
+{
+	struct ip_vs_dest_set_elem *e;
+
+	e = container_of(head, struct ip_vs_dest_set_elem, rcu_head);
+	ip_vs_dest_put_and_free(e->dest);
+	kfree(e);
+}
+
 static void
 ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 {
 	struct ip_vs_dest_set_elem *e;
 
 	list_for_each_entry(e, &set->list, list) {
-		struct ip_vs_dest *d;
-
-		d = rcu_dereference_protected(e->dest, 1);
-		if (d == dest) {
+		if (e->dest == dest) {
 			/* HIT */
 			atomic_dec(&set->size);
 			set->lastmod = jiffies;
-			ip_vs_dest_put(dest);
 			list_del_rcu(&e->list);
-			kfree_rcu(e, rcu_head);
+			call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
 			break;
 		}
 	}
@@ -155,16 +156,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
 	struct ip_vs_dest_set_elem *e, *ep;
 
 	list_for_each_entry_safe(e, ep, &set->list, list) {
-		struct ip_vs_dest *d;
-
-		d = rcu_dereference_protected(e->dest, 1);
-		/*
-		 * We don't kfree dest because it is referred either
-		 * by its service or by the trash dest list.
-		 */
-		ip_vs_dest_put(d);
 		list_del_rcu(&e->list);
-		kfree_rcu(e, rcu_head);
+		call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
 	}
 }
 
@@ -175,12 +168,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 	struct ip_vs_dest *dest, *least;
 	int loh, doh;
 
-	if (set == NULL)
-		return NULL;
-
 	/* select the first destination server, whose weight > 0 */
 	list_for_each_entry_rcu(e, &set->list, list) {
-		least = rcu_dereference(e->dest);
+		least = e->dest;
 		if (least->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 
@@ -195,13 +185,13 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 	/* find the destination with the weighted least load */
   nextstage:
 	list_for_each_entry_continue_rcu(e, &set->list, list) {
-		dest = rcu_dereference(e->dest);
+		dest = e->dest;
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 
 		doh = ip_vs_dest_conn_overhead(dest);
-		if ((loh * atomic_read(&dest->weight) >
-		     doh * atomic_read(&least->weight))
+		if (((__s64)loh * atomic_read(&dest->weight) >
+		     (__s64)doh * atomic_read(&least->weight))
 		    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 			least = dest;
 			loh = doh;
@@ -232,7 +222,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 
 	/* select the first destination server, whose weight > 0 */
 	list_for_each_entry(e, &set->list, list) {
-		most = rcu_dereference_protected(e->dest, 1);
+		most = e->dest;
 		if (atomic_read(&most->weight) > 0) {
 			moh = ip_vs_dest_conn_overhead(most);
 			goto nextstage;
@@ -243,11 +233,11 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 	/* find the destination with the weighted most load */
   nextstage:
 	list_for_each_entry_continue(e, &set->list, list) {
-		dest = rcu_dereference_protected(e->dest, 1);
+		dest = e->dest;
 		doh = ip_vs_dest_conn_overhead(dest);
 		/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
-		if ((moh * atomic_read(&dest->weight) <
-		     doh * atomic_read(&most->weight))
+		if (((__s64)moh * atomic_read(&dest->weight) <
+		     (__s64)doh * atomic_read(&most->weight))
 		    && (atomic_read(&dest->weight) > 0)) {
 			most = dest;
 			moh = doh;
@@ -299,7 +289,7 @@ struct ip_vs_lblcr_table {
  *      IPVS LBLCR sysctl table
  */
 
-static ctl_table vs_vars_table[] = {
+static struct ctl_table vs_vars_table[] = {
 	{
 		.procname	= "lblcr_expiration",
 		.data		= NULL,
@@ -414,7 +404,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
 
 	spin_lock_bh(&svc->sched_lock);
 	tbl->dead = 1;
-	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+	for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
 		hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
 			ip_vs_lblcr_free(en);
 		}
@@ -440,7 +430,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
 	struct ip_vs_lblcr_entry *en;
 	struct hlist_node *next;
 
-	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+	for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
 
 		spin_lock(&svc->sched_lock);
@@ -495,7 +485,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
 	if (goal > tbl->max_size/2)
 		goal = tbl->max_size/2;
 
-	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+	for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
 
 		spin_lock(&svc->sched_lock);
@@ -536,7 +526,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 	/*
 	 *    Initialize the hash buckets
 	 */
-	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+	for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
 		INIT_HLIST_HEAD(&tbl->bucket[i]);
 	}
 	tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
@@ -611,8 +601,8 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
 			continue;
 
 		doh = ip_vs_dest_conn_overhead(dest);
-		if (loh * atomic_read(&dest->weight) >
-		    doh * atomic_read(&least->weight)) {
+		if ((__s64)loh * atomic_read(&dest->weight) >
+		    (__s64)doh * atomic_read(&least->weight)) {
 			least = dest;
 			loh = doh;
 		}
@@ -655,19 +645,17 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
  *    Locality-Based (weighted) Least-Connection scheduling
  */
 static struct ip_vs_dest *
-ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		     struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_lblcr_table *tbl = svc->sched_data;
-	struct ip_vs_iphdr iph;
 	struct ip_vs_dest *dest;
 	struct ip_vs_lblcr_entry *en;
 
-	ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
-
 	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
 	/* First look in our cache */
-	en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
+	en = ip_vs_lblcr_get(svc->af, tbl, &iph->daddr);
 	if (en) {
 		en->lastuse = jiffies;
 
@@ -718,12 +706,12 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	/* If we fail to create a cache entry, we'll just use the valid dest */
 	spin_lock_bh(&svc->sched_lock);
 	if (!tbl->dead)
-		ip_vs_lblcr_new(tbl, &iph.daddr, dest);
+		ip_vs_lblcr_new(tbl, &iph->daddr, dest);
 	spin_unlock_bh(&svc->sched_lock);
 
 out:
 	IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
-		      IP_VS_DBG_ADDR(svc->af, &iph.daddr),
+		      IP_VS_DBG_ADDR(svc->af, &iph->daddr),
 		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
 
 	return dest;
@@ -821,7 +809,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
 {
 	unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
 	unregister_pernet_subsys(&ip_vs_lblcr_ops);
-	synchronize_rcu();
+	rcu_barrier();
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index 5128e33..2bdcb1c 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -26,7 +26,8 @@
  *	Least Connection scheduling
  */
 static struct ip_vs_dest *
-ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		  struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_dest *dest, *least = NULL;
 	unsigned int loh = 0, doh;
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index 646cfd4..961a6de 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -40,7 +40,7 @@
 #include <net/ip_vs.h>
 
 
-static inline unsigned int
+static inline int
 ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
 {
 	/*
@@ -55,10 +55,11 @@ ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
  *	Weighted Least Connection scheduling
  */
 static struct ip_vs_dest *
-ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		  struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_dest *dest, *least = NULL;
-	unsigned int loh = 0, doh;
+	int loh = 0, doh;
 
 	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
@@ -91,8 +92,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		}
 
 		if (!least ||
-		    (loh * atomic_read(&dest->weight) >
-		     doh * atomic_read(&least->weight))) {
+		    ((__s64)loh * atomic_read(&dest->weight) >
+		     (__s64)doh * atomic_read(&least->weight))) {
 			least = dest;
 			loh = doh;
 		}
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 9ef22bd..bed5f70 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -65,7 +65,6 @@ static int get_callid(const char *dptr, unsigned int dataoff,
 static int
 ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 {
-	struct sk_buff *reasm = skb_nfct_reasm(skb);
 	struct ip_vs_iphdr iph;
 	unsigned int dataoff, datalen, matchoff, matchlen;
 	const char *dptr;
@@ -79,15 +78,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 	/* todo: IPv6 fragments:
 	 *       I think this only should be done for the first fragment. /HS
 	 */
-	if (reasm) {
-		skb = reasm;
-		dataoff = iph.thoff_reasm + sizeof(struct udphdr);
-	} else
-		dataoff = iph.len + sizeof(struct udphdr);
+	dataoff = iph.len + sizeof(struct udphdr);
 
 	if (dataoff >= skb->len)
 		return -EINVAL;
-	/* todo: Check if this will mess-up the reasm skb !!! /HS */
 	retc = skb_linearize(skb);
 	if (retc < 0)
 		return retc;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 8646488..2f7ea75 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -15,25 +15,32 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 {
 	struct net *net;
 	struct ip_vs_service *svc;
+	struct netns_ipvs *ipvs;
 	sctp_chunkhdr_t _schunkh, *sch;
 	sctp_sctphdr_t *sh, _sctph;
 
 	sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
-	if (sh == NULL)
+	if (sh == NULL) {
+		*verdict = NF_DROP;
 		return 0;
+	}
 
 	sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
 				 sizeof(_schunkh), &_schunkh);
-	if (sch == NULL)
+	if (sch == NULL) {
+		*verdict = NF_DROP;
 		return 0;
+	}
+
 	net = skb_net(skb);
+	ipvs = net_ipvs(net);
 	rcu_read_lock();
-	if ((sch->type == SCTP_CID_INIT) &&
+	if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) &&
 	    (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
 				      &iph->daddr, sh->dest))) {
 		int ignored;
 
-		if (ip_vs_todrop(net_ipvs(net))) {
+		if (ip_vs_todrop(ipvs)) {
 			/*
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
@@ -64,15 +71,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
 			  unsigned int sctphoff)
 {
-	__u32 crc32;
-	struct sk_buff *iter;
-
-	crc32 = sctp_start_cksum((__u8 *)sctph, skb_headlen(skb) - sctphoff);
-	skb_walk_frags(skb, iter)
-		crc32 = sctp_update_cksum((u8 *) iter->data,
-					  skb_headlen(iter), crc32);
-	sctph->checksum = sctp_end_cksum(crc32);
-
+	sctph->checksum = sctp_compute_cksum(skb, sctphoff);
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 }
 
@@ -82,6 +81,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 {
 	sctp_sctphdr_t *sctph;
 	unsigned int sctphoff = iph->len;
+	bool payload_csum = false;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6 && iph->fragoffs)
@@ -93,19 +93,31 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
+		int ret;
+
 		/* Some checks before mangling */
 		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 			return 0;
 
 		/* Call application helper if needed */
-		if (!ip_vs_app_pkt_out(cp, skb))
+		ret = ip_vs_app_pkt_out(cp, skb);
+		if (ret == 0)
 			return 0;
+		/* ret=2: csum update is needed after payload mangling */
+		if (ret == 2)
+			payload_csum = true;
 	}
 
 	sctph = (void *) skb_network_header(skb) + sctphoff;
-	sctph->source = cp->vport;
 
-	sctp_nat_csum(skb, sctph, sctphoff);
+	/* Only update csum if we really have to */
+	if (sctph->source != cp->vport || payload_csum ||
+	    skb->ip_summed == CHECKSUM_PARTIAL) {
+		sctph->source = cp->vport;
+		sctp_nat_csum(skb, sctph, sctphoff);
+	} else {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
 
 	return 1;
 }
@@ -116,6 +128,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 {
 	sctp_sctphdr_t *sctph;
 	unsigned int sctphoff = iph->len;
+	bool payload_csum = false;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6 && iph->fragoffs)
@@ -127,19 +140,32 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
+		int ret;
+
 		/* Some checks before mangling */
 		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 			return 0;
 
 		/* Call application helper if needed */
-		if (!ip_vs_app_pkt_in(cp, skb))
+		ret = ip_vs_app_pkt_in(cp, skb);
+		if (ret == 0)
 			return 0;
+		/* ret=2: csum update is needed after payload mangling */
+		if (ret == 2)
+			payload_csum = true;
 	}
 
 	sctph = (void *) skb_network_header(skb) + sctphoff;
-	sctph->dest = cp->dport;
 
-	sctp_nat_csum(skb, sctph, sctphoff);
+	/* Only update csum if we really have to */
+	if (sctph->dest != cp->dport || payload_csum ||
+	    (skb->ip_summed == CHECKSUM_PARTIAL &&
+	     !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) {
+		sctph->dest = cp->dport;
+		sctp_nat_csum(skb, sctph, sctphoff);
+	} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
 
 	return 1;
 }
@@ -149,10 +175,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 {
 	unsigned int sctphoff;
 	struct sctphdr *sh, _sctph;
-	struct sk_buff *iter;
-	__le32 cmp;
-	__le32 val;
-	__u32 tmp;
+	__le32 cmp, val;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6)
@@ -166,13 +189,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 		return 0;
 
 	cmp = sh->checksum;
-
-	tmp = sctp_start_cksum((__u8 *) sh, skb_headlen(skb));
-	skb_walk_frags(skb, iter)
-		tmp = sctp_update_cksum((__u8 *) iter->data,
-					skb_headlen(iter), tmp);
-
-	val = sctp_end_cksum(tmp);
+	val = sctp_compute_cksum(skb, sctphoff);
 
 	if (val != cmp) {
 		/* CRC failure, dump it. */
@@ -183,710 +200,159 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 	return 1;
 }
 
-struct ipvs_sctp_nextstate {
-	int next_state;
-};
 enum ipvs_sctp_event_t {
-	IP_VS_SCTP_EVE_DATA_CLI,
-	IP_VS_SCTP_EVE_DATA_SER,
-	IP_VS_SCTP_EVE_INIT_CLI,
-	IP_VS_SCTP_EVE_INIT_SER,
-	IP_VS_SCTP_EVE_INIT_ACK_CLI,
-	IP_VS_SCTP_EVE_INIT_ACK_SER,
-	IP_VS_SCTP_EVE_COOKIE_ECHO_CLI,
-	IP_VS_SCTP_EVE_COOKIE_ECHO_SER,
-	IP_VS_SCTP_EVE_COOKIE_ACK_CLI,
-	IP_VS_SCTP_EVE_COOKIE_ACK_SER,
-	IP_VS_SCTP_EVE_ABORT_CLI,
-	IP_VS_SCTP_EVE__ABORT_SER,
-	IP_VS_SCTP_EVE_SHUT_CLI,
-	IP_VS_SCTP_EVE_SHUT_SER,
-	IP_VS_SCTP_EVE_SHUT_ACK_CLI,
-	IP_VS_SCTP_EVE_SHUT_ACK_SER,
-	IP_VS_SCTP_EVE_SHUT_COM_CLI,
-	IP_VS_SCTP_EVE_SHUT_COM_SER,
-	IP_VS_SCTP_EVE_LAST
+	IP_VS_SCTP_DATA = 0,		/* DATA, SACK, HEARTBEATs */
+	IP_VS_SCTP_INIT,
+	IP_VS_SCTP_INIT_ACK,
+	IP_VS_SCTP_COOKIE_ECHO,
+	IP_VS_SCTP_COOKIE_ACK,
+	IP_VS_SCTP_SHUTDOWN,
+	IP_VS_SCTP_SHUTDOWN_ACK,
+	IP_VS_SCTP_SHUTDOWN_COMPLETE,
+	IP_VS_SCTP_ERROR,
+	IP_VS_SCTP_ABORT,
+	IP_VS_SCTP_EVENT_LAST
 };
 
-static enum ipvs_sctp_event_t sctp_events[256] = {
-	IP_VS_SCTP_EVE_DATA_CLI,
-	IP_VS_SCTP_EVE_INIT_CLI,
-	IP_VS_SCTP_EVE_INIT_ACK_CLI,
-	IP_VS_SCTP_EVE_DATA_CLI,
-	IP_VS_SCTP_EVE_DATA_CLI,
-	IP_VS_SCTP_EVE_DATA_CLI,
-	IP_VS_SCTP_EVE_ABORT_CLI,
-	IP_VS_SCTP_EVE_SHUT_CLI,
-	IP_VS_SCTP_EVE_SHUT_ACK_CLI,
-	IP_VS_SCTP_EVE_DATA_CLI,
-	IP_VS_SCTP_EVE_COOKIE_ECHO_CLI,
-	IP_VS_SCTP_EVE_COOKIE_ACK_CLI,
-	IP_VS_SCTP_EVE_DATA_CLI,
-	IP_VS_SCTP_EVE_DATA_CLI,
-	IP_VS_SCTP_EVE_SHUT_COM_CLI,
+/* RFC 2960, 3.2 Chunk Field Descriptions */
+static __u8 sctp_events[] = {
+	[SCTP_CID_DATA]			= IP_VS_SCTP_DATA,
+	[SCTP_CID_INIT]			= IP_VS_SCTP_INIT,
+	[SCTP_CID_INIT_ACK]		= IP_VS_SCTP_INIT_ACK,
+	[SCTP_CID_SACK]			= IP_VS_SCTP_DATA,
+	[SCTP_CID_HEARTBEAT]		= IP_VS_SCTP_DATA,
+	[SCTP_CID_HEARTBEAT_ACK]	= IP_VS_SCTP_DATA,
+	[SCTP_CID_ABORT]		= IP_VS_SCTP_ABORT,
+	[SCTP_CID_SHUTDOWN]		= IP_VS_SCTP_SHUTDOWN,
+	[SCTP_CID_SHUTDOWN_ACK]		= IP_VS_SCTP_SHUTDOWN_ACK,
+	[SCTP_CID_ERROR]		= IP_VS_SCTP_ERROR,
+	[SCTP_CID_COOKIE_ECHO]		= IP_VS_SCTP_COOKIE_ECHO,
+	[SCTP_CID_COOKIE_ACK]		= IP_VS_SCTP_COOKIE_ACK,
+	[SCTP_CID_ECN_ECNE]		= IP_VS_SCTP_DATA,
+	[SCTP_CID_ECN_CWR]		= IP_VS_SCTP_DATA,
+	[SCTP_CID_SHUTDOWN_COMPLETE]	= IP_VS_SCTP_SHUTDOWN_COMPLETE,
 };
 
-static struct ipvs_sctp_nextstate
- sctp_states_table[IP_VS_SCTP_S_LAST][IP_VS_SCTP_EVE_LAST] = {
-	/*
-	 * STATE : IP_VS_SCTP_S_NONE
-	 */
-	/*next state *//*event */
-	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ },
-	 },
-	/*
-	 * STATE : IP_VS_SCTP_S_INIT_CLI
-	 * Cient sent INIT and is waiting for reply from server(In ECHO_WAIT)
-	 */
-	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ECHO_CLI */ },
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_ECHO_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-	 },
-	/*
-	 * State : IP_VS_SCTP_S_INIT_SER
-	 * Server sent INIT and waiting for INIT ACK from the client
-	 */
-	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-	 {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-	 },
-	/*
-	 * State : IP_VS_SCTP_S_INIT_ACK_CLI
-	 * Client sent INIT ACK and waiting for ECHO from the server
-	 */
-	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-	 /*
-	  * We have got an INIT from client. From the spec.“Upon receipt of
-	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-	  * an INIT ACK using the same parameters it sent in its  original
-	  * INIT chunk (including its Initiate Tag, unchanged”).
-	  */
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-	 /*
-	  * INIT_ACK has been resent by the client, let us stay is in
-	  * the same state
-	  */
-	 {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-	 /*
-	  * INIT_ACK sent by the server, close the connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-	 /*
-	  * ECHO by client, it should not happen, close the connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-	 /*
-	  * ECHO by server, this is what we are expecting, move to ECHO_SER
-	  */
-	 {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-	 /*
-	  * COOKIE ACK from client, it should not happen, close the connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-	 /*
-	  * Unexpected COOKIE ACK from server, staty in the same state
-	  */
-	 {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-	 },
-	/*
-	 * State : IP_VS_SCTP_S_INIT_ACK_SER
-	 * Server sent INIT ACK and waiting for ECHO from the client
-	 */
-	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-	 /*
-	  * We have got an INIT from client. From the spec.“Upon receipt of
-	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-	  * an INIT ACK using the same parameters it sent in its  original
-	  * INIT chunk (including its Initiate Tag, unchanged”).
-	  */
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-	 /*
-	  * Unexpected INIT_ACK by the client, let us close the connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-	 /*
-	  * INIT_ACK resent by the server, let us move to same state
-	  */
-	 {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-	 /*
-	  * Client send the ECHO, this is what we are expecting,
-	  * move to ECHO_CLI
-	  */
-	 {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-	 /*
-	  * ECHO received from the server, Not sure what to do,
-	  * let us close it
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-	 /*
-	  * COOKIE ACK from client, let us stay in the same state
-	  */
-	 {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-	 /*
-	  * COOKIE ACK from server, hmm... this should not happen, lets close
-	  * the connection.
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-	 },
-	/*
-	 * State : IP_VS_SCTP_S_ECHO_CLI
-	 * Cient  sent ECHO and waiting COOKEI ACK from the Server
-	 */
-	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-	 /*
-	  * We have got an INIT from client. From the spec.“Upon receipt of
-	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-	  * an INIT ACK using the same parameters it sent in its  original
-	  * INIT chunk (including its Initiate Tag, unchanged”).
-	  */
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-	 /*
-	  * INIT_ACK has been by the client, let us close the connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-	 /*
-	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
-	  * “If an INIT ACK is received by an endpoint in any state other
-	  * than the COOKIE-WAIT state, the endpoint should discard the
-	  * INIT ACK chunk”. Stay in the same state
-	  */
-	 {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-	 /*
-	  * Client resent the ECHO, let us stay in the same state
-	  */
-	 {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-	 /*
-	  * ECHO received from the server, Not sure what to do,
-	  * let us close it
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-	 /*
-	  * COOKIE ACK from client, this shoud not happen, let's close the
-	  * connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-	 /*
-	  * COOKIE ACK from server, this is what we are awaiting,lets move to
-	  * ESTABLISHED.
-	  */
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-	 },
-	/*
-	 * State : IP_VS_SCTP_S_ECHO_SER
-	 * Server sent ECHO and waiting COOKEI ACK from the client
-	 */
-	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-	 /*
-	  * We have got an INIT from client. From the spec.“Upon receipt of
-	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-	  * an INIT ACK using the same parameters it sent in its  original
-	  * INIT chunk (including its Initiate Tag, unchanged”).
-	  */
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-	 /*
-	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
-	  * “If an INIT ACK is received by an endpoint in any state other
-	  * than the COOKIE-WAIT state, the endpoint should discard the
-	  * INIT ACK chunk”. Stay in the same state
-	  */
-	 {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-	 /*
-	  * INIT_ACK has been by the server, let us close the connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-	 /*
-	  * Client sent the ECHO, not sure what to do, let's close the
-	  * connection.
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-	 /*
-	  * ECHO resent by the server, stay in the same state
-	  */
-	 {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-	 /*
-	  * COOKIE ACK from client, this is what we are expecting, let's move
-	  * to ESTABLISHED.
-	  */
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-	 /*
-	  * COOKIE ACK from server, this should not happen, lets close the
-	  * connection.
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-	 },
-	/*
-	 * State : IP_VS_SCTP_S_ESTABLISHED
-	 * Association established
-	 */
-	{{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_SER */ },
-	 /*
-	  * We have got an INIT from client. From the spec.“Upon receipt of
-	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-	  * an INIT ACK using the same parameters it sent in its  original
-	  * INIT chunk (including its Initiate Tag, unchanged”).
-	  */
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-	 /*
-	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
-	  * “If an INIT ACK is received by an endpoint in any state other
-	  * than the COOKIE-WAIT state, the endpoint should discard the
-	  * INIT ACK chunk”. Stay in the same state
-	  */
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-	 /*
-	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
-	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
-	  * it will send ERROR chunk. So, stay in the same state
-	  */
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-	 /*
-	  * COOKIE ACK from client, not sure what to do stay in the same state
-	  */
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-	 /*
-	  * SHUTDOWN from the client, move to SHUDDOWN_CLI
-	  */
-	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-	 /*
-	  * SHUTDOWN from the server, move to SHUTDOWN_SER
-	  */
-	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
-	 /*
-	  * client sent SHUDTDOWN_ACK, this should not happen, let's close
-	  * the connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-	 },
-	/*
-	 * State : IP_VS_SCTP_S_SHUT_CLI
-	 * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server
-	 */
-	/*
-	 * We received the data chuck, keep the state unchanged. I assume
-	 * that still data chuncks  can be received by both the peers in
-	 * SHUDOWN state
-	 */
-
-	{{IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ },
-	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_SER */ },
-	 /*
-	  * We have got an INIT from client. From the spec.“Upon receipt of
-	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-	  * an INIT ACK using the same parameters it sent in its  original
-	  * INIT chunk (including its Initiate Tag, unchanged”).
-	  */
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-	 /*
-	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
-	  * “If an INIT ACK is received by an endpoint in any state other
-	  * than the COOKIE-WAIT state, the endpoint should discard the
-	  * INIT ACK chunk”. Stay in the same state
-	  */
-	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-	 /*
-	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
-	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
-	  * it will send ERROR chunk. So, stay in the same state
-	  */
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-	 /*
-	  * COOKIE ACK from client, not sure what to do stay in the same state
-	  */
-	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-	 /*
-	  * SHUTDOWN resent from the client, move to SHUDDOWN_CLI
-	  */
-	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-	 /*
-	  * SHUTDOWN from the server, move to SHUTDOWN_SER
-	  */
-	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
-	 /*
-	  * client sent SHUDTDOWN_ACK, this should not happen, let's close
-	  * the connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-	 /*
-	  * Server sent SHUTDOWN ACK, this is what we are expecting, let's move
-	  * to SHUDOWN_ACK_SER
-	  */
-	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-	 /*
-	  * SHUTDOWN COM from client, this should not happen, let's close the
-	  * connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-	 },
-	/*
-	 * State : IP_VS_SCTP_S_SHUT_SER
-	 * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client
-	 */
-	/*
-	 * We received the data chuck, keep the state unchanged. I assume
-	 * that still data chuncks  can be received by both the peers in
-	 * SHUDOWN state
-	 */
-
-	{{IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_CLI */ },
-	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_SER */ },
-	 /*
-	  * We have got an INIT from client. From the spec.“Upon receipt of
-	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-	  * an INIT ACK using the same parameters it sent in its  original
-	  * INIT chunk (including its Initiate Tag, unchanged”).
-	  */
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-	 /*
-	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
-	  * “If an INIT ACK is received by an endpoint in any state other
-	  * than the COOKIE-WAIT state, the endpoint should discard the
-	  * INIT ACK chunk”. Stay in the same state
-	  */
-	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-	 /*
-	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
-	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
-	  * it will send ERROR chunk. So, stay in the same state
-	  */
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-	 /*
-	  * COOKIE ACK from client, not sure what to do stay in the same state
-	  */
-	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-	 /*
-	  * SHUTDOWN resent from the client, move to SHUDDOWN_CLI
-	  */
-	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-	 /*
-	  * SHUTDOWN resent from the server, move to SHUTDOWN_SER
-	  */
-	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
-	 /*
-	  * client sent SHUDTDOWN_ACK, this is what we are expecting, let's
-	  * move to SHUT_ACK_CLI
-	  */
-	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-	 /*
-	  * Server sent SHUTDOWN ACK, this should not happen, let's close the
-	  * connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-	 /*
-	  * SHUTDOWN COM from client, this should not happen, let's close the
-	  * connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-	 },
-
-	/*
-	 * State : IP_VS_SCTP_S_SHUT_ACK_CLI
-	 * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server
-	 */
-	/*
-	 * We received the data chuck, keep the state unchanged. I assume
-	 * that still data chuncks  can be received by both the peers in
-	 * SHUDOWN state
-	 */
-
-	{{IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ },
-	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_SER */ },
-	 /*
-	  * We have got an INIT from client. From the spec.“Upon receipt of
-	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-	  * an INIT ACK using the same parameters it sent in its  original
-	  * INIT chunk (including its Initiate Tag, unchanged”).
-	  */
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-	 /*
-	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
-	  * “If an INIT ACK is received by an endpoint in any state other
-	  * than the COOKIE-WAIT state, the endpoint should discard the
-	  * INIT ACK chunk”. Stay in the same state
-	  */
-	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-	 /*
-	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
-	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
-	  * it will send ERROR chunk. So, stay in the same state
-	  */
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-	 /*
-	  * COOKIE ACK from client, not sure what to do stay in the same state
-	  */
-	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-	 /*
-	  * SHUTDOWN sent from the client, move to SHUDDOWN_CLI
-	  */
-	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-	 /*
-	  * SHUTDOWN sent from the server, move to SHUTDOWN_SER
-	  */
-	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
-	 /*
-	  * client resent SHUDTDOWN_ACK, let's stay in the same state
-	  */
-	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-	 /*
-	  * Server sent SHUTDOWN ACK, this should not happen, let's close the
-	  * connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-	 /*
-	  * SHUTDOWN COM from client, this should not happen, let's close the
-	  * connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-	 /*
-	  * SHUTDOWN COMPLETE from server this is what we are expecting.
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-	 },
-
-	/*
-	 * State : IP_VS_SCTP_S_SHUT_ACK_SER
-	 * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client
-	 */
-	/*
-	 * We received the data chuck, keep the state unchanged. I assume
-	 * that still data chuncks  can be received by both the peers in
-	 * SHUDOWN state
-	 */
+/* SCTP States:
+ * See RFC 2960, 4. SCTP Association State Diagram
+ *
+ * New states (not in diagram):
+ * - INIT1 state: use shorter timeout for dropped INIT packets
+ * - REJECTED state: use shorter timeout if INIT is rejected with ABORT
+ * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging
+ *
+ * The states are as seen in real server. In the diagram, INIT1, INIT,
+ * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state.
+ *
+ * States as per packets from client (C) and server (S):
+ *
+ * Setup of client connection:
+ * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK
+ * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK
+ * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO
+ * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK
+ *
+ * Setup of server connection:
+ * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK
+ * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO
+ * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK
+ */
 
-	{{IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_CLI */ },
-	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_SER */ },
-	 /*
-	  * We have got an INIT from client. From the spec.“Upon receipt of
-	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
-	  * an INIT ACK using the same parameters it sent in its  original
-	  * INIT chunk (including its Initiate Tag, unchanged”).
-	  */
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-	 /*
-	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
-	  * “If an INIT ACK is received by an endpoint in any state other
-	  * than the COOKIE-WAIT state, the endpoint should discard the
-	  * INIT ACK chunk”. Stay in the same state
-	  */
-	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-	 /*
-	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
-	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
-	  * it will send ERROR chunk. So, stay in the same state
-	  */
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-	 /*
-	  * COOKIE ACK from client, not sure what to do stay in the same state
-	  */
-	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-	 /*
-	  * SHUTDOWN sent from the client, move to SHUDDOWN_CLI
-	  */
-	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-	 /*
-	  * SHUTDOWN sent from the server, move to SHUTDOWN_SER
-	  */
-	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
-	 /*
-	  * client sent SHUDTDOWN_ACK, this should not happen let's close
-	  * the connection.
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-	 /*
-	  * Server resent SHUTDOWN ACK, stay in the same state
-	  */
-	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-	 /*
-	  * SHUTDOWN COM from client, this what we are expecting, let's close
-	  * the connection
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-	 /*
-	  * SHUTDOWN COMPLETE from server this should not happen.
-	  */
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-	 },
-	/*
-	 * State : IP_VS_SCTP_S_CLOSED
-	 */
-	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
-	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
-	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
-	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
-	 }
+#define sNO IP_VS_SCTP_S_NONE
+#define sI1 IP_VS_SCTP_S_INIT1
+#define sIN IP_VS_SCTP_S_INIT
+#define sCS IP_VS_SCTP_S_COOKIE_SENT
+#define sCR IP_VS_SCTP_S_COOKIE_REPLIED
+#define sCW IP_VS_SCTP_S_COOKIE_WAIT
+#define sCO IP_VS_SCTP_S_COOKIE
+#define sCE IP_VS_SCTP_S_COOKIE_ECHOED
+#define sES IP_VS_SCTP_S_ESTABLISHED
+#define sSS IP_VS_SCTP_S_SHUTDOWN_SENT
+#define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED
+#define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT
+#define sRJ IP_VS_SCTP_S_REJECTED
+#define sCL IP_VS_SCTP_S_CLOSED
+
+static const __u8 sctp_states
+	[IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = {
+	{ /* INPUT */
+/*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
+/* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* i   */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
+/* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL},
+/* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
+/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL},
+/* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL},
+/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL},
+/* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+	},
+	{ /* OUTPUT */
+/*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
+/* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* i   */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW},
+/* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
+/* s   */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL},
+/* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL},
+/* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* ab  */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+	},
+	{ /* INPUT-ONLY */
+/*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
+/* d   */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* i   */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
+/* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
+/* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
+/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL},
+/* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL},
+/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+	},
 };
 
-/*
- *      Timeout table[state]
- */
+#define IP_VS_SCTP_MAX_RTO	((60 + 1) * HZ)
+
+/* Timeout table[state] */
 static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
-	[IP_VS_SCTP_S_NONE]         =     2 * HZ,
-	[IP_VS_SCTP_S_INIT_CLI]     =     1 * 60 * HZ,
-	[IP_VS_SCTP_S_INIT_SER]     =     1 * 60 * HZ,
-	[IP_VS_SCTP_S_INIT_ACK_CLI] =     1 * 60 * HZ,
-	[IP_VS_SCTP_S_INIT_ACK_SER] =     1 * 60 * HZ,
-	[IP_VS_SCTP_S_ECHO_CLI]     =     1 * 60 * HZ,
-	[IP_VS_SCTP_S_ECHO_SER]     =     1 * 60 * HZ,
-	[IP_VS_SCTP_S_ESTABLISHED]  =    15 * 60 * HZ,
-	[IP_VS_SCTP_S_SHUT_CLI]     =     1 * 60 * HZ,
-	[IP_VS_SCTP_S_SHUT_SER]     =     1 * 60 * HZ,
-	[IP_VS_SCTP_S_SHUT_ACK_CLI] =     1 * 60 * HZ,
-	[IP_VS_SCTP_S_SHUT_ACK_SER] =     1 * 60 * HZ,
-	[IP_VS_SCTP_S_CLOSED]       =    10 * HZ,
-	[IP_VS_SCTP_S_LAST]         =     2 * HZ,
+	[IP_VS_SCTP_S_NONE]			= 2 * HZ,
+	[IP_VS_SCTP_S_INIT1]			= (0 + 3 + 1) * HZ,
+	[IP_VS_SCTP_S_INIT]			= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_COOKIE_SENT]		= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_COOKIE_REPLIED]		= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_COOKIE_WAIT]		= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_COOKIE]			= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_COOKIE_ECHOED]		= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_ESTABLISHED]		= 15 * 60 * HZ,
+	[IP_VS_SCTP_S_SHUTDOWN_SENT]		= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_SHUTDOWN_RECEIVED]	= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]	= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_REJECTED]			= (0 + 3 + 1) * HZ,
+	[IP_VS_SCTP_S_CLOSED]			= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_LAST]			= 2 * HZ,
 };
 
 static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
-	[IP_VS_SCTP_S_NONE]         =    "NONE",
-	[IP_VS_SCTP_S_INIT_CLI]     =    "INIT_CLI",
-	[IP_VS_SCTP_S_INIT_SER]     =    "INIT_SER",
-	[IP_VS_SCTP_S_INIT_ACK_CLI] =    "INIT_ACK_CLI",
-	[IP_VS_SCTP_S_INIT_ACK_SER] =    "INIT_ACK_SER",
-	[IP_VS_SCTP_S_ECHO_CLI]     =    "COOKIE_ECHO_CLI",
-	[IP_VS_SCTP_S_ECHO_SER]     =    "COOKIE_ECHO_SER",
-	[IP_VS_SCTP_S_ESTABLISHED]  =    "ESTABISHED",
-	[IP_VS_SCTP_S_SHUT_CLI]     =    "SHUTDOWN_CLI",
-	[IP_VS_SCTP_S_SHUT_SER]     =    "SHUTDOWN_SER",
-	[IP_VS_SCTP_S_SHUT_ACK_CLI] =    "SHUTDOWN_ACK_CLI",
-	[IP_VS_SCTP_S_SHUT_ACK_SER] =    "SHUTDOWN_ACK_SER",
-	[IP_VS_SCTP_S_CLOSED]       =    "CLOSED",
-	[IP_VS_SCTP_S_LAST]         =    "BUG!"
+	[IP_VS_SCTP_S_NONE]			= "NONE",
+	[IP_VS_SCTP_S_INIT1]			= "INIT1",
+	[IP_VS_SCTP_S_INIT]			= "INIT",
+	[IP_VS_SCTP_S_COOKIE_SENT]		= "C-SENT",
+	[IP_VS_SCTP_S_COOKIE_REPLIED]		= "C-REPLIED",
+	[IP_VS_SCTP_S_COOKIE_WAIT]		= "C-WAIT",
+	[IP_VS_SCTP_S_COOKIE]			= "COOKIE",
+	[IP_VS_SCTP_S_COOKIE_ECHOED]		= "C-ECHOED",
+	[IP_VS_SCTP_S_ESTABLISHED]		= "ESTABLISHED",
+	[IP_VS_SCTP_S_SHUTDOWN_SENT]		= "S-SENT",
+	[IP_VS_SCTP_S_SHUTDOWN_RECEIVED]	= "S-RECEIVED",
+	[IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]	= "S-ACK-SENT",
+	[IP_VS_SCTP_S_REJECTED]			= "REJECTED",
+	[IP_VS_SCTP_S_CLOSED]			= "CLOSED",
+	[IP_VS_SCTP_S_LAST]			= "BUG!",
 };
 
 
@@ -943,17 +409,20 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 		}
 	}
 
-	event = sctp_events[chunk_type];
+	event = (chunk_type < sizeof(sctp_events)) ?
+		sctp_events[chunk_type] : IP_VS_SCTP_DATA;
 
-	/*
-	 *  If the direction is IP_VS_DIR_OUTPUT, this event is from server
+	/* Update direction to INPUT_ONLY if necessary
+	 * or delete NO_OUTPUT flag if output packet detected
 	 */
-	if (direction == IP_VS_DIR_OUTPUT)
-		event++;
-	/*
-	 * get next state
-	 */
-	next_state = sctp_states_table[cp->state][event].next_state;
+	if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
+		if (direction == IP_VS_DIR_OUTPUT)
+			cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
+		else
+			direction = IP_VS_DIR_INPUT_ONLY;
+	}
+
+	next_state = sctp_states[direction][event][cp->state];
 
 	if (next_state != cp->state) {
 		struct ip_vs_dest *dest = cp->dest;
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 50a1594..e3a6972 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -39,6 +39,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 	struct net *net;
 	struct ip_vs_service *svc;
 	struct tcphdr _tcph, *th;
+	struct netns_ipvs *ipvs;
 
 	th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
 	if (th == NULL) {
@@ -46,14 +47,15 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		return 0;
 	}
 	net = skb_net(skb);
+	ipvs = net_ipvs(net);
 	/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
 	rcu_read_lock();
-	if (th->syn &&
+	if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst &&
 	    (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
 				      &iph->daddr, th->dest))) {
 		int ignored;
 
-		if (ip_vs_todrop(net_ipvs(net))) {
+		if (ip_vs_todrop(ipvs)) {
 			/*
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
@@ -401,7 +403,7 @@ static struct tcp_states_t tcp_states [] = {
 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
-/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
 
 /*	OUTPUT */
@@ -415,7 +417,7 @@ static struct tcp_states_t tcp_states [] = {
 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
-/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
 };
 
@@ -424,7 +426,7 @@ static struct tcp_states_t tcp_states_dos [] = {
 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
-/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
+/*ack*/ {{sES, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
 
 /*	OUTPUT */
@@ -438,7 +440,7 @@ static struct tcp_states_t tcp_states_dos [] = {
 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
 /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
-/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
 };
 
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index c35986c..176b87c 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -55,7 +55,8 @@ static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest)
  * Round-Robin Scheduling
  */
 static struct ip_vs_dest *
-ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		  struct ip_vs_iphdr *iph)
 {
 	struct list_head *p;
 	struct ip_vs_dest *dest, *last;
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index f320592..e446b9f 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -44,7 +44,7 @@
 #include <net/ip_vs.h>
 
 
-static inline unsigned int
+static inline int
 ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
 {
 	/*
@@ -59,10 +59,11 @@ ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
  *	Weighted Least Connection scheduling
  */
 static struct ip_vs_dest *
-ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		   struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_dest *dest, *least;
-	unsigned int loh, doh;
+	int loh, doh;
 
 	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
@@ -98,8 +99,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 		doh = ip_vs_sed_dest_overhead(dest);
-		if (loh * atomic_read(&dest->weight) >
-		    doh * atomic_read(&least->weight)) {
+		if ((__s64)loh * atomic_read(&dest->weight) >
+		    (__s64)doh * atomic_read(&least->weight)) {
 			least = dest;
 			loh = doh;
 		}
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index a65edfe4..cc65b2f 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -48,6 +48,10 @@
 
 #include <net/ip_vs.h>
 
+#include <net/tcp.h>
+#include <linux/udp.h>
+#include <linux/sctp.h>
+
 
 /*
  *      IPVS SH bucket
@@ -71,10 +75,19 @@ struct ip_vs_sh_state {
 	struct ip_vs_sh_bucket		buckets[IP_VS_SH_TAB_SIZE];
 };
 
+/* Helper function to determine if server is unavailable */
+static inline bool is_unavailable(struct ip_vs_dest *dest)
+{
+	return atomic_read(&dest->weight) <= 0 ||
+	       dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
 /*
  *	Returns hash value for IPVS SH entry
  */
-static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr)
+static inline unsigned int
+ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr,
+		 __be16 port, unsigned int offset)
 {
 	__be32 addr_fold = addr->ip;
 
@@ -83,7 +96,8 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
 		addr_fold = addr->ip6[0]^addr->ip6[1]^
 			    addr->ip6[2]^addr->ip6[3];
 #endif
-	return (ntohl(addr_fold)*2654435761UL) & IP_VS_SH_TAB_MASK;
+	return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) &
+		IP_VS_SH_TAB_MASK;
 }
 
 
@@ -91,12 +105,61 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
  *      Get ip_vs_dest associated with supplied parameters.
  */
 static inline struct ip_vs_dest *
-ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr)
+ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
+	     const union nf_inet_addr *addr, __be16 port)
 {
-	return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest);
+	unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
+	struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest);
+
+	return (!dest || is_unavailable(dest)) ? NULL : dest;
 }
 
 
+/* As ip_vs_sh_get, but with fallback if selected server is unavailable
+ *
+ * The fallback strategy loops around the table starting from a "random"
+ * point (in fact, it is chosen to be the original hash value to make the
+ * algorithm deterministic) to find a new server.
+ */
+static inline struct ip_vs_dest *
+ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
+		      const union nf_inet_addr *addr, __be16 port)
+{
+	unsigned int offset, roffset;
+	unsigned int hash, ihash;
+	struct ip_vs_dest *dest;
+
+	/* first try the dest it's supposed to go to */
+	ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
+	dest = rcu_dereference(s->buckets[ihash].dest);
+	if (!dest)
+		return NULL;
+	if (!is_unavailable(dest))
+		return dest;
+
+	IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting",
+		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+
+	/* if the original dest is unavailable, loop around the table
+	 * starting from ihash to find a new dest
+	 */
+	for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
+		roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE;
+		hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset);
+		dest = rcu_dereference(s->buckets[hash].dest);
+		if (!dest)
+			break;
+		if (!is_unavailable(dest))
+			return dest;
+		IP_VS_DBG_BUF(6, "SH: selected unavailable "
+			      "server %s:%d (offset %d), reselecting",
+			      IP_VS_DBG_ADDR(svc->af, &dest->addr),
+			      ntohs(dest->port), roffset);
+	}
+
+	return NULL;
+}
+
 /*
  *      Assign all the hash buckets of the specified table with the service.
  */
@@ -213,13 +276,39 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
 }
 
 
-/*
- *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
- *      consider that the server is overloaded here.
- */
-static inline int is_overloaded(struct ip_vs_dest *dest)
+/* Helper function to get port number */
+static inline __be16
+ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
 {
-	return dest->flags & IP_VS_DEST_F_OVERLOAD;
+	__be16 port;
+	struct tcphdr _tcph, *th;
+	struct udphdr _udph, *uh;
+	sctp_sctphdr_t _sctph, *sh;
+
+	switch (iph->protocol) {
+	case IPPROTO_TCP:
+		th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+		if (unlikely(th == NULL))
+			return 0;
+		port = th->source;
+		break;
+	case IPPROTO_UDP:
+		uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
+		if (unlikely(uh == NULL))
+			return 0;
+		port = uh->source;
+		break;
+	case IPPROTO_SCTP:
+		sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
+		if (unlikely(sh == NULL))
+			return 0;
+		port = sh->source;
+		break;
+	default:
+		port = 0;
+	}
+
+	return port;
 }
 
 
@@ -227,28 +316,32 @@ static inline int is_overloaded(struct ip_vs_dest *dest)
  *      Source Hashing scheduling
  */
 static struct ip_vs_dest *
-ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		  struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_dest *dest;
 	struct ip_vs_sh_state *s;
-	struct ip_vs_iphdr iph;
-
-	ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
+	__be16 port = 0;
 
 	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
 
+	if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT)
+		port = ip_vs_sh_get_port(skb, iph);
+
 	s = (struct ip_vs_sh_state *) svc->sched_data;
-	dest = ip_vs_sh_get(svc->af, s, &iph.saddr);
-	if (!dest
-	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
-	    || atomic_read(&dest->weight) <= 0
-	    || is_overloaded(dest)) {
+
+	if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
+		dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port);
+	else
+		dest = ip_vs_sh_get(svc, s, &iph->saddr, port);
+
+	if (!dest) {
 		ip_vs_scheduler_err(svc, "no destination available");
 		return NULL;
 	}
 
 	IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
-		      IP_VS_DBG_ADDR(svc->af, &iph.saddr),
+		      IP_VS_DBG_ADDR(svc->af, &iph->saddr),
 		      IP_VS_DBG_ADDR(svc->af, &dest->addr),
 		      ntohs(dest->port));
 
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index f6046d9..f63c238 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -425,6 +425,16 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
 	return sb;
 }
 
+/* Check if connection is controlled by persistence */
+static inline bool in_persistence(struct ip_vs_conn *cp)
+{
+	for (cp = cp->control; cp; cp = cp->control) {
+		if (cp->flags & IP_VS_CONN_F_TEMPLATE)
+			return true;
+	}
+	return false;
+}
+
 /* Check if conn should be synced.
  * pkts: conn packets, use sysctl_sync_threshold to avoid packet check
  * - (1) sync_refresh_period: reduce sync rate. Additionally, retry
@@ -447,6 +457,8 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
 	/* Check if we sync in current state */
 	if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
 		force = 0;
+	else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp)))
+		return 0;
 	else if (likely(cp->protocol == IPPROTO_TCP)) {
 		if (!((1 << cp->state) &
 		      ((1 << IP_VS_TCP_S_ESTABLISHED) |
@@ -461,9 +473,10 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
 	} else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
 		if (!((1 << cp->state) &
 		      ((1 << IP_VS_SCTP_S_ESTABLISHED) |
-		       (1 << IP_VS_SCTP_S_CLOSED) |
-		       (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) |
-		       (1 << IP_VS_SCTP_S_SHUT_ACK_SER))))
+		       (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) |
+		       (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) |
+		       (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) |
+		       (1 << IP_VS_SCTP_S_CLOSED))))
 			return 0;
 		force = cp->state != cp->old_state;
 		if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
@@ -1624,12 +1637,9 @@ static int sync_thread_master(void *data)
 			continue;
 		}
 		while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
-			int ret = 0;
-
-			__wait_event_interruptible(*sk_sleep(sk),
+			int ret = __wait_event_interruptible(*sk_sleep(sk),
 						   sock_writeable(sk) ||
-						   kthread_should_stop(),
-						   ret);
+						   kthread_should_stop());
 			if (unlikely(kthread_should_stop()))
 				goto done;
 		}
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index c60a81c..b5b4650 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -31,10 +31,11 @@
  *	Weighted Least Connection scheduling
  */
 static struct ip_vs_dest *
-ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		   struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_dest *dest, *least;
-	unsigned int loh, doh;
+	int loh, doh;
 
 	IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
 
@@ -70,8 +71,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 		doh = ip_vs_dest_conn_overhead(dest);
-		if (loh * atomic_read(&dest->weight) >
-		    doh * atomic_read(&least->weight)) {
+		if ((__s64)loh * atomic_read(&dest->weight) >
+		    (__s64)doh * atomic_read(&least->weight)) {
 			least = dest;
 			loh = doh;
 		}
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 0e68555..0546cd5 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -162,7 +162,8 @@ static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc,
  *    Weighted Round-Robin Scheduling
  */
 static struct ip_vs_dest *
-ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		   struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_dest *dest, *last, *stop = NULL;
 	struct ip_vs_wrr_mark *mark = svc->sched_data;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index b75ff64..c47444e 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	iph->daddr		=	cp->daddr.ip;
 	iph->saddr		=	saddr;
 	iph->ttl		=	old_iph->ttl;
-	ip_select_ident(iph, &rt->dst, NULL);
+	ip_select_ident(skb, &rt->dst, NULL);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 2d3030a..a4b5e2a 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -39,21 +39,23 @@ static struct ctl_table acct_sysctl_table[] = {
 unsigned int
 seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
 {
-	struct nf_conn_counter *acct;
+	struct nf_conn_acct *acct;
+	struct nf_conn_counter *counter;
 
 	acct = nf_conn_acct_find(ct);
 	if (!acct)
 		return 0;
 
+	counter = acct->counter;
 	return seq_printf(s, "packets=%llu bytes=%llu ",
-			  (unsigned long long)atomic64_read(&acct[dir].packets),
-			  (unsigned long long)atomic64_read(&acct[dir].bytes));
+			  (unsigned long long)atomic64_read(&counter[dir].packets),
+			  (unsigned long long)atomic64_read(&counter[dir].bytes));
 };
 EXPORT_SYMBOL_GPL(seq_print_acct);
 
 static struct nf_ct_ext_type acct_extend __read_mostly = {
-	.len	= sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]),
-	.align	= __alignof__(struct nf_conn_counter[IP_CT_DIR_MAX]),
+	.len	= sizeof(struct nf_conn_acct),
+	.align	= __alignof__(struct nf_conn_acct),
 	.id	= NF_CT_EXT_ACCT,
 };
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0283bae..43549eb 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -39,6 +39,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_acct.h>
@@ -47,6 +48,7 @@
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_helper.h>
@@ -238,7 +240,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	nf_conntrack_free(ct);
 }
 
-void nf_ct_delete_from_lists(struct nf_conn *ct)
+static void nf_ct_delete_from_lists(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
 
@@ -253,7 +255,6 @@ void nf_ct_delete_from_lists(struct nf_conn *ct)
 			     &net->ct.dying);
 	spin_unlock_bh(&nf_conntrack_lock);
 }
-EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
 
 static void death_by_event(unsigned long ul_conntrack)
 {
@@ -275,7 +276,7 @@ static void death_by_event(unsigned long ul_conntrack)
 	nf_ct_put(ct);
 }
 
-void nf_ct_dying_timeout(struct nf_conn *ct)
+static void nf_ct_dying_timeout(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
@@ -288,27 +289,33 @@ void nf_ct_dying_timeout(struct nf_conn *ct)
 		(prandom_u32() % net->ct.sysctl_events_retry_timeout);
 	add_timer(&ecache->timeout);
 }
-EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
 
-static void death_by_timeout(unsigned long ul_conntrack)
+bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
 {
-	struct nf_conn *ct = (void *)ul_conntrack;
 	struct nf_conn_tstamp *tstamp;
 
 	tstamp = nf_conn_tstamp_find(ct);
 	if (tstamp && tstamp->stop == 0)
 		tstamp->stop = ktime_to_ns(ktime_get_real());
 
-	if (!test_bit(IPS_DYING_BIT, &ct->status) &&
-	    unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
+	if (!nf_ct_is_dying(ct) &&
+	    unlikely(nf_conntrack_event_report(IPCT_DESTROY, ct,
+	    portid, report) < 0)) {
 		/* destroy event was not delivered */
 		nf_ct_delete_from_lists(ct);
 		nf_ct_dying_timeout(ct);
-		return;
+		return false;
 	}
 	set_bit(IPS_DYING_BIT, &ct->status);
 	nf_ct_delete_from_lists(ct);
 	nf_ct_put(ct);
+	return true;
+}
+EXPORT_SYMBOL_GPL(nf_ct_delete);
+
+static void death_by_timeout(unsigned long ul_conntrack)
+{
+	nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
 }
 
 /*
@@ -643,10 +650,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 		return dropped;
 
 	if (del_timer(&ct->timeout)) {
-		death_by_timeout((unsigned long)ct);
-		/* Check if we indeed killed this entry. Reliable event
-		   delivery may have inserted it into the dying list. */
-		if (test_bit(IPS_DYING_BIT, &ct->status)) {
+		if (nf_ct_delete(ct, 0, 0)) {
 			dropped = 1;
 			NF_CT_STAT_INC_ATOMIC(net, early_drop);
 		}
@@ -760,9 +764,10 @@ void nf_conntrack_free(struct nf_conn *ct)
 	struct net *net = nf_ct_net(ct);
 
 	nf_ct_ext_destroy(ct);
-	atomic_dec(&net->ct.count);
 	nf_ct_ext_free(ct);
 	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
+	smp_mb__before_atomic_dec();
+	atomic_dec(&net->ct.count);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_free);
 
@@ -796,6 +801,11 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	if (IS_ERR(ct))
 		return (struct nf_conntrack_tuple_hash *)ct;
 
+	if (tmpl && nfct_synproxy(tmpl)) {
+		nfct_seqadj_ext_add(ct);
+		nfct_synproxy_ext_add(ct);
+	}
+
 	timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
 	if (timeout_ext)
 		timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
@@ -1100,12 +1110,14 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 
 acct:
 	if (do_acct) {
-		struct nf_conn_counter *acct;
+		struct nf_conn_acct *acct;
 
 		acct = nf_conn_acct_find(ct);
 		if (acct) {
-			atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets);
-			atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes);
+			struct nf_conn_counter *counter = acct->counter;
+
+			atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
+			atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes);
 		}
 	}
 }
@@ -1117,13 +1129,15 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
 		       int do_acct)
 {
 	if (do_acct) {
-		struct nf_conn_counter *acct;
+		struct nf_conn_acct *acct;
 
 		acct = nf_conn_acct_find(ct);
 		if (acct) {
-			atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets);
+			struct nf_conn_counter *counter = acct->counter;
+
+			atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
 			atomic64_add(skb->len - skb_network_offset(skb),
-				     &acct[CTINFO2DIR(ctinfo)].bytes);
+				     &counter[CTINFO2DIR(ctinfo)].bytes);
 		}
 	}
 
@@ -1192,7 +1206,7 @@ EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
 #endif
 
 /* Used by ipt_REJECT and ip6t_REJECT. */
-static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
+static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -1244,7 +1258,7 @@ found:
 
 void nf_ct_iterate_cleanup(struct net *net,
 			   int (*iter)(struct nf_conn *i, void *data),
-			   void *data)
+			   void *data, u32 portid, int report)
 {
 	struct nf_conn *ct;
 	unsigned int bucket = 0;
@@ -1252,7 +1266,8 @@ void nf_ct_iterate_cleanup(struct net *net,
 	while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
 		/* Time to push up daises... */
 		if (del_timer(&ct->timeout))
-			death_by_timeout((unsigned long)ct);
+			nf_ct_delete(ct, portid, report);
+
 		/* ... else the timer will get him soon. */
 
 		nf_ct_put(ct);
@@ -1260,30 +1275,6 @@ void nf_ct_iterate_cleanup(struct net *net,
 }
 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
 
-struct __nf_ct_flush_report {
-	u32 portid;
-	int report;
-};
-
-static int kill_report(struct nf_conn *i, void *data)
-{
-	struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
-	struct nf_conn_tstamp *tstamp;
-
-	tstamp = nf_conn_tstamp_find(i);
-	if (tstamp && tstamp->stop == 0)
-		tstamp->stop = ktime_to_ns(ktime_get_real());
-
-	/* If we fail to deliver the event, death_by_timeout() will retry */
-	if (nf_conntrack_event_report(IPCT_DESTROY, i,
-				      fr->portid, fr->report) < 0)
-		return 1;
-
-	/* Avoid the delivery of the destroy event in death_by_timeout(). */
-	set_bit(IPS_DYING_BIT, &i->status);
-	return 1;
-}
-
 static int kill_all(struct nf_conn *i, void *data)
 {
 	return 1;
@@ -1301,11 +1292,7 @@ EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
 
 void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
 {
-	struct __nf_ct_flush_report fr = {
-		.portid	= portid,
-		.report = report,
-	};
-	nf_ct_iterate_cleanup(net, kill_report, &fr);
+	nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
 
@@ -1351,6 +1338,7 @@ void nf_conntrack_cleanup_end(void)
 	nf_ct_extend_unregister(&nf_ct_zone_extend);
 #endif
 	nf_conntrack_proto_fini();
+	nf_conntrack_seqadj_fini();
 	nf_conntrack_labels_fini();
 	nf_conntrack_helper_fini();
 	nf_conntrack_timeout_fini();
@@ -1386,7 +1374,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
 i_see_dead_people:
 	busy = 0;
 	list_for_each_entry(net, net_exit_list, exit_list) {
-		nf_ct_iterate_cleanup(net, kill_all, NULL);
+		nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
 		nf_ct_release_dying_list(net);
 		if (atomic_read(&net->ct.count) != 0)
 			busy = 1;
@@ -1556,6 +1544,10 @@ int nf_conntrack_init_start(void)
 	if (ret < 0)
 		goto err_labels;
 
+	ret = nf_conntrack_seqadj_init();
+	if (ret < 0)
+		goto err_seqadj;
+
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	ret = nf_ct_extend_register(&nf_ct_zone_extend);
 	if (ret < 0)
@@ -1580,6 +1572,8 @@ err_proto:
 	nf_ct_extend_unregister(&nf_ct_zone_extend);
 err_extend:
 #endif
+	nf_conntrack_seqadj_fini();
+err_seqadj:
 	nf_conntrack_labels_fini();
 err_labels:
 	nf_conntrack_helper_fini();
@@ -1602,9 +1596,6 @@ void nf_conntrack_init_end(void)
 	/* For use by REJECT target */
 	RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
 	RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
-
-	/* Howto get NAT offsets */
-	RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
 }
 
 /*
@@ -1691,8 +1682,3 @@ err_slabname:
 err_stat:
 	return ret;
 }
-
-s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
-			enum ip_conntrack_dir dir,
-			u32 seq);
-EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index c63b618..4fd1ca9 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -293,6 +293,11 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
 		       sizeof(exp->tuple.dst.u3) - len);
 
 	exp->tuple.dst.u.all = *dst;
+
+#ifdef CONFIG_NF_NAT_NEEDED
+	memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
+	memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
+#endif
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
 
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 6b21707..b8a0924 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -55,10 +55,14 @@ unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
 				struct nf_conntrack_expect *exp);
 EXPORT_SYMBOL_GPL(nf_nat_ftp_hook);
 
-static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char);
-static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char);
+static int try_rfc959(const char *, size_t, struct nf_conntrack_man *,
+		      char, unsigned int *);
+static int try_rfc1123(const char *, size_t, struct nf_conntrack_man *,
+		       char, unsigned int *);
+static int try_eprt(const char *, size_t, struct nf_conntrack_man *,
+		    char, unsigned int *);
 static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *,
-			     char);
+			     char, unsigned int *);
 
 static struct ftp_search {
 	const char *pattern;
@@ -66,7 +70,7 @@ static struct ftp_search {
 	char skip;
 	char term;
 	enum nf_ct_ftp_type ftptype;
-	int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char);
+	int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char, unsigned int *);
 } search[IP_CT_DIR_MAX][2] = {
 	[IP_CT_DIR_ORIGINAL] = {
 		{
@@ -90,10 +94,8 @@ static struct ftp_search {
 		{
 			.pattern	= "227 ",
 			.plen		= sizeof("227 ") - 1,
-			.skip		= '(',
-			.term		= ')',
 			.ftptype	= NF_CT_FTP_PASV,
-			.getnum		= try_rfc959,
+			.getnum		= try_rfc1123,
 		},
 		{
 			.pattern	= "229 ",
@@ -132,8 +134,9 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[],
 			i++;
 		else {
 			/* Unexpected character; true if it's the
-			   terminator and we're finished. */
-			if (*data == term && i == array_size - 1)
+			   terminator (or we don't care about one)
+			   and we're finished. */
+			if ((*data == term || !term) && i == array_size - 1)
 				return len;
 
 			pr_debug("Char %u (got %u nums) `%u' unexpected\n",
@@ -148,7 +151,8 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[],
 
 /* Returns 0, or length of numbers: 192,168,1,1,5,6 */
 static int try_rfc959(const char *data, size_t dlen,
-		      struct nf_conntrack_man *cmd, char term)
+		      struct nf_conntrack_man *cmd, char term,
+		      unsigned int *offset)
 {
 	int length;
 	u_int32_t array[6];
@@ -163,6 +167,33 @@ static int try_rfc959(const char *data, size_t dlen,
 	return length;
 }
 
+/*
+ * From RFC 1123:
+ * The format of the 227 reply to a PASV command is not
+ * well standardized.  In particular, an FTP client cannot
+ * assume that the parentheses shown on page 40 of RFC-959
+ * will be present (and in fact, Figure 3 on page 43 omits
+ * them).  Therefore, a User-FTP program that interprets
+ * the PASV reply must scan the reply for the first digit
+ * of the host and port numbers.
+ */
+static int try_rfc1123(const char *data, size_t dlen,
+		       struct nf_conntrack_man *cmd, char term,
+		       unsigned int *offset)
+{
+	int i;
+	for (i = 0; i < dlen; i++)
+		if (isdigit(data[i]))
+			break;
+
+	if (i == dlen)
+		return 0;
+
+	*offset += i;
+
+	return try_rfc959(data + i, dlen - i, cmd, 0, offset);
+}
+
 /* Grab port: number up to delimiter */
 static int get_port(const char *data, int start, size_t dlen, char delim,
 		    __be16 *port)
@@ -191,7 +222,7 @@ static int get_port(const char *data, int start, size_t dlen, char delim,
 
 /* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */
 static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
-		    char term)
+		    char term, unsigned int *offset)
 {
 	char delim;
 	int length;
@@ -239,7 +270,8 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
 
 /* Returns 0, or length of numbers: |||6446| */
 static int try_epsv_response(const char *data, size_t dlen,
-			     struct nf_conntrack_man *cmd, char term)
+			     struct nf_conntrack_man *cmd, char term,
+			     unsigned int *offset)
 {
 	char delim;
 
@@ -261,9 +293,10 @@ static int find_pattern(const char *data, size_t dlen,
 			unsigned int *numlen,
 			struct nf_conntrack_man *cmd,
 			int (*getnum)(const char *, size_t,
-				      struct nf_conntrack_man *, char))
+				      struct nf_conntrack_man *, char,
+				      unsigned int *))
 {
-	size_t i;
+	size_t i = plen;
 
 	pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen);
 	if (dlen == 0)
@@ -293,16 +326,18 @@ static int find_pattern(const char *data, size_t dlen,
 	pr_debug("Pattern matches!\n");
 	/* Now we've found the constant string, try to skip
 	   to the 'skip' character */
-	for (i = plen; data[i] != skip; i++)
-		if (i == dlen - 1) return -1;
+	if (skip) {
+		for (i = plen; data[i] != skip; i++)
+			if (i == dlen - 1) return -1;
 
-	/* Skip over the last character */
-	i++;
+		/* Skip over the last character */
+		i++;
+	}
 
 	pr_debug("Skipped up to `%c'!\n", skip);
 
 	*numoff = i;
-	*numlen = getnum(data + i, dlen - i, cmd, term);
+	*numlen = getnum(data + i, dlen - i, cmd, term, numoff);
 	if (!*numlen)
 		return -1;
 
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index bdebd03..70866d1 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -778,8 +778,8 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 				   flowi6_to_flowi(&fl1), false)) {
 			if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
 					   flowi6_to_flowi(&fl2), false)) {
-				if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
-					    sizeof(rt1->rt6i_gateway)) &&
+				if (ipv6_addr_equal(rt6_nexthop(rt1),
+						    rt6_nexthop(rt2)) &&
 				    rt1->dst.dev == rt2->dst.dev)
 					ret = 1;
 				dst_release(&rt2->dst);
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 355d2ef..bb53f12 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -8,12 +8,8 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/ctype.h>
 #include <linux/export.h>
-#include <linux/jhash.h>
-#include <linux/spinlock.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_labels.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index ecf065f..08870b8 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -37,6 +37,7 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
@@ -210,13 +211,23 @@ nla_put_failure:
 }
 
 static int
-dump_counters(struct sk_buff *skb, u64 pkts, u64 bytes,
-	      enum ip_conntrack_dir dir)
+dump_counters(struct sk_buff *skb, struct nf_conn_acct *acct,
+	      enum ip_conntrack_dir dir, int type)
 {
-	enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
+	enum ctattr_type attr = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
+	struct nf_conn_counter *counter = acct->counter;
 	struct nlattr *nest_count;
+	u64 pkts, bytes;
+
+	if (type == IPCTNL_MSG_CT_GET_CTRZERO) {
+		pkts = atomic64_xchg(&counter[dir].packets, 0);
+		bytes = atomic64_xchg(&counter[dir].bytes, 0);
+	} else {
+		pkts = atomic64_read(&counter[dir].packets);
+		bytes = atomic64_read(&counter[dir].bytes);
+	}
 
-	nest_count = nla_nest_start(skb, type | NLA_F_NESTED);
+	nest_count = nla_nest_start(skb, attr | NLA_F_NESTED);
 	if (!nest_count)
 		goto nla_put_failure;
 
@@ -233,24 +244,19 @@ nla_put_failure:
 }
 
 static int
-ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
-			enum ip_conntrack_dir dir, int type)
+ctnetlink_dump_acct(struct sk_buff *skb, const struct nf_conn *ct, int type)
 {
-	struct nf_conn_counter *acct;
-	u64 pkts, bytes;
+	struct nf_conn_acct *acct = nf_conn_acct_find(ct);
 
-	acct = nf_conn_acct_find(ct);
 	if (!acct)
 		return 0;
 
-	if (type == IPCTNL_MSG_CT_GET_CTRZERO) {
-		pkts = atomic64_xchg(&acct[dir].packets, 0);
-		bytes = atomic64_xchg(&acct[dir].bytes, 0);
-	} else {
-		pkts = atomic64_read(&acct[dir].packets);
-		bytes = atomic64_read(&acct[dir].bytes);
-	}
-	return dump_counters(skb, pkts, bytes, dir);
+	if (dump_counters(skb, acct, IP_CT_DIR_ORIGINAL, type) < 0)
+		return -1;
+	if (dump_counters(skb, acct, IP_CT_DIR_REPLY, type) < 0)
+		return -1;
+
+	return 0;
 }
 
 static int
@@ -381,9 +387,8 @@ nla_put_failure:
 	return -1;
 }
 
-#ifdef CONFIG_NF_NAT_NEEDED
 static int
-dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type)
+dump_ct_seq_adj(struct sk_buff *skb, const struct nf_ct_seqadj *seq, int type)
 {
 	struct nlattr *nest_parms;
 
@@ -391,12 +396,12 @@ dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type)
 	if (!nest_parms)
 		goto nla_put_failure;
 
-	if (nla_put_be32(skb, CTA_NAT_SEQ_CORRECTION_POS,
-			 htonl(natseq->correction_pos)) ||
-	    nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_BEFORE,
-			 htonl(natseq->offset_before)) ||
-	    nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_AFTER,
-			 htonl(natseq->offset_after)))
+	if (nla_put_be32(skb, CTA_SEQADJ_CORRECTION_POS,
+			 htonl(seq->correction_pos)) ||
+	    nla_put_be32(skb, CTA_SEQADJ_OFFSET_BEFORE,
+			 htonl(seq->offset_before)) ||
+	    nla_put_be32(skb, CTA_SEQADJ_OFFSET_AFTER,
+			 htonl(seq->offset_after)))
 		goto nla_put_failure;
 
 	nla_nest_end(skb, nest_parms);
@@ -408,27 +413,24 @@ nla_put_failure:
 }
 
 static inline int
-ctnetlink_dump_nat_seq_adj(struct sk_buff *skb, const struct nf_conn *ct)
+ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, const struct nf_conn *ct)
 {
-	struct nf_nat_seq *natseq;
-	struct nf_conn_nat *nat = nfct_nat(ct);
+	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+	struct nf_ct_seqadj *seq;
 
-	if (!(ct->status & IPS_SEQ_ADJUST) || !nat)
+	if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj)
 		return 0;
 
-	natseq = &nat->seq[IP_CT_DIR_ORIGINAL];
-	if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_ORIG) == -1)
+	seq = &seqadj->seq[IP_CT_DIR_ORIGINAL];
+	if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1)
 		return -1;
 
-	natseq = &nat->seq[IP_CT_DIR_REPLY];
-	if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_REPLY) == -1)
+	seq = &seqadj->seq[IP_CT_DIR_REPLY];
+	if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1)
 		return -1;
 
 	return 0;
 }
-#else
-#define ctnetlink_dump_nat_seq_adj(a, b) (0)
-#endif
 
 static inline int
 ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
@@ -491,8 +493,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 
 	if (ctnetlink_dump_status(skb, ct) < 0 ||
 	    ctnetlink_dump_timeout(skb, ct) < 0 ||
-	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, type) < 0 ||
-	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, type) < 0 ||
+	    ctnetlink_dump_acct(skb, ct, type) < 0 ||
 	    ctnetlink_dump_timestamp(skb, ct) < 0 ||
 	    ctnetlink_dump_protoinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
@@ -502,7 +503,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	    ctnetlink_dump_id(skb, ct) < 0 ||
 	    ctnetlink_dump_use(skb, ct) < 0 ||
 	    ctnetlink_dump_master(skb, ct) < 0 ||
-	    ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
+	    ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
 		goto nla_put_failure;
 
 	nlmsg_end(skb, nlh);
@@ -533,7 +534,7 @@ ctnetlink_proto_size(const struct nf_conn *ct)
 }
 
 static inline size_t
-ctnetlink_counters_size(const struct nf_conn *ct)
+ctnetlink_acct_size(const struct nf_conn *ct)
 {
 	if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT))
 		return 0;
@@ -582,7 +583,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
 	       + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
-	       + ctnetlink_counters_size(ct)
+	       + ctnetlink_acct_size(ct)
 	       + ctnetlink_timestamp_size(ct)
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
 	       + nla_total_size(0) /* CTA_PROTOINFO */
@@ -676,10 +677,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 		goto nla_put_failure;
 
 	if (events & (1 << IPCT_DESTROY)) {
-		if (ctnetlink_dump_counters(skb, ct,
-					    IP_CT_DIR_ORIGINAL, type) < 0 ||
-		    ctnetlink_dump_counters(skb, ct,
-					    IP_CT_DIR_REPLY, type) < 0 ||
+		if (ctnetlink_dump_acct(skb, ct, type) < 0 ||
 		    ctnetlink_dump_timestamp(skb, ct) < 0)
 			goto nla_put_failure;
 	} else {
@@ -707,8 +705,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 		    ctnetlink_dump_master(skb, ct) < 0)
 			goto nla_put_failure;
 
-		if (events & (1 << IPCT_NATSEQADJ) &&
-		    ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
+		if (events & (1 << IPCT_SEQADJ) &&
+		    ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
 			goto nla_put_failure;
 	}
 
@@ -828,7 +826,9 @@ ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple)
 	struct nf_conntrack_l3proto *l3proto;
 	int ret = 0;
 
-	nla_parse_nested(tb, CTA_IP_MAX, attr, NULL);
+	ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL);
+	if (ret < 0)
+		return ret;
 
 	rcu_read_lock();
 	l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
@@ -895,7 +895,9 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
 
 	memset(tuple, 0, sizeof(*tuple));
 
-	nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy);
+	err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy);
+	if (err < 0)
+		return err;
 
 	if (!tb[CTA_TUPLE_IP])
 		return -EINVAL;
@@ -946,9 +948,12 @@ static inline int
 ctnetlink_parse_help(const struct nlattr *attr, char **helper_name,
 		     struct nlattr **helpinfo)
 {
+	int err;
 	struct nlattr *tb[CTA_HELP_MAX+1];
 
-	nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy);
+	err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy);
+	if (err < 0)
+		return err;
 
 	if (!tb[CTA_HELP_NAME])
 		return -EINVAL;
@@ -1031,21 +1036,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		}
 	}
 
-	if (del_timer(&ct->timeout)) {
-		if (nf_conntrack_event_report(IPCT_DESTROY, ct,
-					      NETLINK_CB(skb).portid,
-					      nlmsg_report(nlh)) < 0) {
-			nf_ct_delete_from_lists(ct);
-			/* we failed to report the event, try later */
-			nf_ct_dying_timeout(ct);
-			nf_ct_put(ct);
-			return 0;
-		}
-		/* death_by_timeout would report the event again */
-		set_bit(IPS_DYING_BIT, &ct->status);
-		nf_ct_delete_from_lists(ct);
-		nf_ct_put(ct);
-	}
+	if (del_timer(&ct->timeout))
+		nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
+
 	nf_ct_put(ct);
 
 	return 0;
@@ -1431,7 +1424,9 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]
 	struct nf_conntrack_l4proto *l4proto;
 	int err = 0;
 
-	nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy);
+	err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy);
+	if (err < 0)
+		return err;
 
 	rcu_read_lock();
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
@@ -1442,63 +1437,65 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]
 	return err;
 }
 
-#ifdef CONFIG_NF_NAT_NEEDED
-static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = {
-	[CTA_NAT_SEQ_CORRECTION_POS]	= { .type = NLA_U32 },
-	[CTA_NAT_SEQ_OFFSET_BEFORE]	= { .type = NLA_U32 },
-	[CTA_NAT_SEQ_OFFSET_AFTER]	= { .type = NLA_U32 },
+static const struct nla_policy seqadj_policy[CTA_SEQADJ_MAX+1] = {
+	[CTA_SEQADJ_CORRECTION_POS]	= { .type = NLA_U32 },
+	[CTA_SEQADJ_OFFSET_BEFORE]	= { .type = NLA_U32 },
+	[CTA_SEQADJ_OFFSET_AFTER]	= { .type = NLA_U32 },
 };
 
 static inline int
-change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr)
+change_seq_adj(struct nf_ct_seqadj *seq, const struct nlattr * const attr)
 {
-	struct nlattr *cda[CTA_NAT_SEQ_MAX+1];
+	int err;
+	struct nlattr *cda[CTA_SEQADJ_MAX+1];
 
-	nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy);
+	err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy);
+	if (err < 0)
+		return err;
 
-	if (!cda[CTA_NAT_SEQ_CORRECTION_POS])
+	if (!cda[CTA_SEQADJ_CORRECTION_POS])
 		return -EINVAL;
 
-	natseq->correction_pos =
-		ntohl(nla_get_be32(cda[CTA_NAT_SEQ_CORRECTION_POS]));
+	seq->correction_pos =
+		ntohl(nla_get_be32(cda[CTA_SEQADJ_CORRECTION_POS]));
 
-	if (!cda[CTA_NAT_SEQ_OFFSET_BEFORE])
+	if (!cda[CTA_SEQADJ_OFFSET_BEFORE])
 		return -EINVAL;
 
-	natseq->offset_before =
-		ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_BEFORE]));
+	seq->offset_before =
+		ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_BEFORE]));
 
-	if (!cda[CTA_NAT_SEQ_OFFSET_AFTER])
+	if (!cda[CTA_SEQADJ_OFFSET_AFTER])
 		return -EINVAL;
 
-	natseq->offset_after =
-		ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_AFTER]));
+	seq->offset_after =
+		ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_AFTER]));
 
 	return 0;
 }
 
 static int
-ctnetlink_change_nat_seq_adj(struct nf_conn *ct,
-			     const struct nlattr * const cda[])
+ctnetlink_change_seq_adj(struct nf_conn *ct,
+			 const struct nlattr * const cda[])
 {
+	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
 	int ret = 0;
-	struct nf_conn_nat *nat = nfct_nat(ct);
 
-	if (!nat)
+	if (!seqadj)
 		return 0;
 
-	if (cda[CTA_NAT_SEQ_ADJ_ORIG]) {
-		ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_ORIGINAL],
-					 cda[CTA_NAT_SEQ_ADJ_ORIG]);
+	if (cda[CTA_SEQ_ADJ_ORIG]) {
+		ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL],
+				     cda[CTA_SEQ_ADJ_ORIG]);
 		if (ret < 0)
 			return ret;
 
 		ct->status |= IPS_SEQ_ADJUST;
 	}
 
-	if (cda[CTA_NAT_SEQ_ADJ_REPLY]) {
-		ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_REPLY],
-					 cda[CTA_NAT_SEQ_ADJ_REPLY]);
+	if (cda[CTA_SEQ_ADJ_REPLY]) {
+		ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY],
+				     cda[CTA_SEQ_ADJ_REPLY]);
 		if (ret < 0)
 			return ret;
 
@@ -1507,7 +1504,6 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct,
 
 	return 0;
 }
-#endif
 
 static int
 ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[])
@@ -1573,13 +1569,12 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
 		ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
 #endif
 
-#ifdef CONFIG_NF_NAT_NEEDED
-	if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) {
-		err = ctnetlink_change_nat_seq_adj(ct, cda);
+	if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
+		err = ctnetlink_change_seq_adj(ct, cda);
 		if (err < 0)
 			return err;
 	}
-#endif
+
 	if (cda[CTA_LABELS]) {
 		err = ctnetlink_attach_labels(ct, cda);
 		if (err < 0)
@@ -1684,13 +1679,11 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 			goto err2;
 	}
 
-#ifdef CONFIG_NF_NAT_NEEDED
-	if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) {
-		err = ctnetlink_change_nat_seq_adj(ct, cda);
+	if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
+		err = ctnetlink_change_seq_adj(ct, cda);
 		if (err < 0)
 			goto err2;
 	}
-#endif
 
 	memset(&ct->proto, 0, sizeof(ct->proto));
 	if (cda[CTA_PROTOINFO]) {
@@ -1804,7 +1797,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 						      (1 << IPCT_ASSURED) |
 						      (1 << IPCT_HELPER) |
 						      (1 << IPCT_PROTOINFO) |
-						      (1 << IPCT_NATSEQADJ) |
+						      (1 << IPCT_SEQADJ) |
 						      (1 << IPCT_MARK) | events,
 						      ct, NETLINK_CB(skb).portid,
 						      nlmsg_report(nlh));
@@ -1827,7 +1820,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 						      (1 << IPCT_HELPER) |
 						      (1 << IPCT_LABEL) |
 						      (1 << IPCT_PROTOINFO) |
-						      (1 << IPCT_NATSEQADJ) |
+						      (1 << IPCT_SEQADJ) |
 						      (1 << IPCT_MARK),
 						      ct, NETLINK_CB(skb).portid,
 						      nlmsg_report(nlh));
@@ -1987,6 +1980,27 @@ out:
 	return err == -EAGAIN ? -ENOBUFS : err;
 }
 
+static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
+	[CTA_EXPECT_MASTER]	= { .type = NLA_NESTED },
+	[CTA_EXPECT_TUPLE]	= { .type = NLA_NESTED },
+	[CTA_EXPECT_MASK]	= { .type = NLA_NESTED },
+	[CTA_EXPECT_TIMEOUT]	= { .type = NLA_U32 },
+	[CTA_EXPECT_ID]		= { .type = NLA_U32 },
+	[CTA_EXPECT_HELP_NAME]	= { .type = NLA_NUL_STRING,
+				    .len = NF_CT_HELPER_NAME_LEN - 1 },
+	[CTA_EXPECT_ZONE]	= { .type = NLA_U16 },
+	[CTA_EXPECT_FLAGS]	= { .type = NLA_U32 },
+	[CTA_EXPECT_CLASS]	= { .type = NLA_U32 },
+	[CTA_EXPECT_NAT]	= { .type = NLA_NESTED },
+	[CTA_EXPECT_FN]		= { .type = NLA_NUL_STRING },
+};
+
+static struct nf_conntrack_expect *
+ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
+		       struct nf_conntrack_helper *helper,
+		       struct nf_conntrack_tuple *tuple,
+		       struct nf_conntrack_tuple *mask);
+
 #ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
 static size_t
 ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
@@ -2061,7 +2075,7 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
 		goto nla_put_failure;
 
 	if ((ct->status & IPS_SEQ_ADJUST) &&
-	    ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
+	    ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
 		goto nla_put_failure;
 
 #ifdef CONFIG_NF_CONNTRACK_MARK
@@ -2116,7 +2130,9 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
 	struct nlattr *cda[CTA_MAX+1];
 	int ret;
 
-	nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy);
+	ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy);
+	if (ret < 0)
+		return ret;
 
 	spin_lock_bh(&nf_conntrack_lock);
 	ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
@@ -2125,10 +2141,70 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
 	return ret;
 }
 
+static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
+				       const struct nf_conn *ct,
+				       struct nf_conntrack_tuple *tuple,
+				       struct nf_conntrack_tuple *mask)
+{
+	int err;
+
+	err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE,
+				    nf_ct_l3num(ct));
+	if (err < 0)
+		return err;
+
+	return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK,
+				     nf_ct_l3num(ct));
+}
+
+static int
+ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
+				u32 portid, u32 report)
+{
+	struct nlattr *cda[CTA_EXPECT_MAX+1];
+	struct nf_conntrack_tuple tuple, mask;
+	struct nf_conntrack_helper *helper = NULL;
+	struct nf_conntrack_expect *exp;
+	int err;
+
+	err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy);
+	if (err < 0)
+		return err;
+
+	err = ctnetlink_nfqueue_exp_parse((const struct nlattr * const *)cda,
+					  ct, &tuple, &mask);
+	if (err < 0)
+		return err;
+
+	if (cda[CTA_EXPECT_HELP_NAME]) {
+		const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
+
+		helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+						    nf_ct_protonum(ct));
+		if (helper == NULL)
+			return -EOPNOTSUPP;
+	}
+
+	exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct,
+				     helper, &tuple, &mask);
+	if (IS_ERR(exp))
+		return PTR_ERR(exp);
+
+	err = nf_ct_expect_related_report(exp, portid, report);
+	if (err < 0) {
+		nf_ct_expect_put(exp);
+		return err;
+	}
+
+	return 0;
+}
+
 static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
 	.build_size	= ctnetlink_nfqueue_build_size,
 	.build		= ctnetlink_nfqueue_build,
 	.parse		= ctnetlink_nfqueue_parse,
+	.attach_expect	= ctnetlink_nfqueue_attach_expect,
+	.seq_adjust	= nf_ct_tcp_seqadj_set,
 };
 #endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */
 
@@ -2496,21 +2572,6 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
 	return err;
 }
 
-static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
-	[CTA_EXPECT_MASTER]	= { .type = NLA_NESTED },
-	[CTA_EXPECT_TUPLE]	= { .type = NLA_NESTED },
-	[CTA_EXPECT_MASK]	= { .type = NLA_NESTED },
-	[CTA_EXPECT_TIMEOUT]	= { .type = NLA_U32 },
-	[CTA_EXPECT_ID]		= { .type = NLA_U32 },
-	[CTA_EXPECT_HELP_NAME]	= { .type = NLA_NUL_STRING,
-				    .len = NF_CT_HELPER_NAME_LEN - 1 },
-	[CTA_EXPECT_ZONE]	= { .type = NLA_U16 },
-	[CTA_EXPECT_FLAGS]	= { .type = NLA_U32 },
-	[CTA_EXPECT_CLASS]	= { .type = NLA_U32 },
-	[CTA_EXPECT_NAT]	= { .type = NLA_NESTED },
-	[CTA_EXPECT_FN]		= { .type = NLA_NUL_STRING },
-};
-
 static int
 ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 		     const struct nlmsghdr *nlh,
@@ -2711,7 +2772,9 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
 	struct nf_conntrack_tuple nat_tuple = {};
 	int err;
 
-	nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy);
+	err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy);
+	if (err < 0)
+		return err;
 
 	if (!tb[CTA_EXPECT_NAT_DIR] || !tb[CTA_EXPECT_NAT_TUPLE])
 		return -EINVAL;
@@ -2731,76 +2794,26 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
 #endif
 }
 
-static int
-ctnetlink_create_expect(struct net *net, u16 zone,
-			const struct nlattr * const cda[],
-			u_int8_t u3,
-			u32 portid, int report)
+static struct nf_conntrack_expect *
+ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
+		       struct nf_conntrack_helper *helper,
+		       struct nf_conntrack_tuple *tuple,
+		       struct nf_conntrack_tuple *mask)
 {
-	struct nf_conntrack_tuple tuple, mask, master_tuple;
-	struct nf_conntrack_tuple_hash *h = NULL;
+	u_int32_t class = 0;
 	struct nf_conntrack_expect *exp;
-	struct nf_conn *ct;
 	struct nf_conn_help *help;
-	struct nf_conntrack_helper *helper = NULL;
-	u_int32_t class = 0;
-	int err = 0;
-
-	/* caller guarantees that those three CTA_EXPECT_* exist */
-	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
-	if (err < 0)
-		return err;
-	err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
-	if (err < 0)
-		return err;
-	err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
-	if (err < 0)
-		return err;
-
-	/* Look for master conntrack of this expectation */
-	h = nf_conntrack_find_get(net, zone, &master_tuple);
-	if (!h)
-		return -ENOENT;
-	ct = nf_ct_tuplehash_to_ctrack(h);
-
-	/* Look for helper of this expectation */
-	if (cda[CTA_EXPECT_HELP_NAME]) {
-		const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
-
-		helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
-						    nf_ct_protonum(ct));
-		if (helper == NULL) {
-#ifdef CONFIG_MODULES
-			if (request_module("nfct-helper-%s", helpname) < 0) {
-				err = -EOPNOTSUPP;
-				goto out;
-			}
-
-			helper = __nf_conntrack_helper_find(helpname,
-							    nf_ct_l3num(ct),
-							    nf_ct_protonum(ct));
-			if (helper) {
-				err = -EAGAIN;
-				goto out;
-			}
-#endif
-			err = -EOPNOTSUPP;
-			goto out;
-		}
-	}
+	int err;
 
 	if (cda[CTA_EXPECT_CLASS] && helper) {
 		class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS]));
-		if (class > helper->expect_class_max) {
-			err = -EINVAL;
-			goto out;
-		}
+		if (class > helper->expect_class_max)
+			return ERR_PTR(-EINVAL);
 	}
 	exp = nf_ct_expect_alloc(ct);
-	if (!exp) {
-		err = -ENOMEM;
-		goto out;
-	}
+	if (!exp)
+		return ERR_PTR(-ENOMEM);
+
 	help = nfct_help(ct);
 	if (!help) {
 		if (!cda[CTA_EXPECT_TIMEOUT]) {
@@ -2838,21 +2851,89 @@ ctnetlink_create_expect(struct net *net, u16 zone,
 	exp->class = class;
 	exp->master = ct;
 	exp->helper = helper;
-	memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
-	memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3));
-	exp->mask.src.u.all = mask.src.u.all;
+	exp->tuple = *tuple;
+	exp->mask.src.u3 = mask->src.u3;
+	exp->mask.src.u.all = mask->src.u.all;
 
 	if (cda[CTA_EXPECT_NAT]) {
 		err = ctnetlink_parse_expect_nat(cda[CTA_EXPECT_NAT],
-						 exp, u3);
+						 exp, nf_ct_l3num(ct));
 		if (err < 0)
 			goto err_out;
 	}
-	err = nf_ct_expect_related_report(exp, portid, report);
+	return exp;
 err_out:
 	nf_ct_expect_put(exp);
-out:
-	nf_ct_put(nf_ct_tuplehash_to_ctrack(h));
+	return ERR_PTR(err);
+}
+
+static int
+ctnetlink_create_expect(struct net *net, u16 zone,
+			const struct nlattr * const cda[],
+			u_int8_t u3, u32 portid, int report)
+{
+	struct nf_conntrack_tuple tuple, mask, master_tuple;
+	struct nf_conntrack_tuple_hash *h = NULL;
+	struct nf_conntrack_helper *helper = NULL;
+	struct nf_conntrack_expect *exp;
+	struct nf_conn *ct;
+	int err;
+
+	/* caller guarantees that those three CTA_EXPECT_* exist */
+	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+	if (err < 0)
+		return err;
+	err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
+	if (err < 0)
+		return err;
+	err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
+	if (err < 0)
+		return err;
+
+	/* Look for master conntrack of this expectation */
+	h = nf_conntrack_find_get(net, zone, &master_tuple);
+	if (!h)
+		return -ENOENT;
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	if (cda[CTA_EXPECT_HELP_NAME]) {
+		const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
+
+		helper = __nf_conntrack_helper_find(helpname, u3,
+						    nf_ct_protonum(ct));
+		if (helper == NULL) {
+#ifdef CONFIG_MODULES
+			if (request_module("nfct-helper-%s", helpname) < 0) {
+				err = -EOPNOTSUPP;
+				goto err_ct;
+			}
+			helper = __nf_conntrack_helper_find(helpname, u3,
+							    nf_ct_protonum(ct));
+			if (helper) {
+				err = -EAGAIN;
+				goto err_ct;
+			}
+#endif
+			err = -EOPNOTSUPP;
+			goto err_ct;
+		}
+	}
+
+	exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask);
+	if (IS_ERR(exp)) {
+		err = PTR_ERR(exp);
+		goto err_ct;
+	}
+
+	err = nf_ct_expect_related_report(exp, portid, report);
+	if (err < 0)
+		goto err_exp;
+
+	return 0;
+err_exp:
+	nf_ct_expect_put(exp);
+err_ct:
+	nf_ct_put(ct);
 	return err;
 }
 
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 0ab9636..ce30041 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -281,7 +281,7 @@ void nf_ct_l3proto_pernet_unregister(struct net *net,
 	nf_ct_l3proto_unregister_sysctl(net, proto);
 
 	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_cleanup(net, kill_l3proto, proto);
+	nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
 
@@ -476,7 +476,7 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
 	nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
 
 	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
+	nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
 
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index f874fc5..891070e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -27,6 +27,8 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
 #include <net/netfilter/nf_log.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@@ -495,21 +497,6 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
 	}
 }
 
-#ifdef CONFIG_NF_NAT_NEEDED
-static inline s16 nat_offset(const struct nf_conn *ct,
-			     enum ip_conntrack_dir dir,
-			     u32 seq)
-{
-	typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
-
-	return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
-}
-#define NAT_OFFSET(ct, dir, seq) \
-	(nat_offset(ct, dir, seq))
-#else
-#define NAT_OFFSET(ct, dir, seq)	0
-#endif
-
 static bool tcp_in_window(const struct nf_conn *ct,
 			  struct ip_ct_tcp *state,
 			  enum ip_conntrack_dir dir,
@@ -525,8 +512,8 @@ static bool tcp_in_window(const struct nf_conn *ct,
 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
 	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
 	__u32 seq, ack, sack, end, win, swin;
-	s16 receiver_offset;
-	bool res;
+	s32 receiver_offset;
+	bool res, in_recv_win;
 
 	/*
 	 * Get the required data from the packet.
@@ -540,7 +527,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
 		tcp_sack(skb, dataoff, tcph, &sack);
 
 	/* Take into account NAT sequence number mangling */
-	receiver_offset = NAT_OFFSET(ct, !dir, ack - 1);
+	receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
 	ack -= receiver_offset;
 	sack -= receiver_offset;
 
@@ -649,14 +636,18 @@ static bool tcp_in_window(const struct nf_conn *ct,
 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 		 receiver->td_scale);
 
+	/* Is the ending sequence in the receive window (if available)? */
+	in_recv_win = !receiver->td_maxwin ||
+		      after(end, sender->td_end - receiver->td_maxwin - 1);
+
 	pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
 		 before(seq, sender->td_maxend + 1),
-		 after(end, sender->td_end - receiver->td_maxwin - 1),
+		 (in_recv_win ? 1 : 0),
 		 before(sack, receiver->td_end + 1),
 		 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
 
 	if (before(seq, sender->td_maxend + 1) &&
-	    after(end, sender->td_end - receiver->td_maxwin - 1) &&
+	    in_recv_win &&
 	    before(sack, receiver->td_end + 1) &&
 	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
 		/*
@@ -767,7 +758,7 @@ DONE:
 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 			"nf_ct_tcp: %s ",
 			before(seq, sender->td_maxend + 1) ?
-			after(end, sender->td_end - receiver->td_maxwin - 1) ?
+			in_recv_win ?
 			before(sack, receiver->td_end + 1) ?
 			after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
 			: "ACK is under the lower bound (possible overly delayed ACK)"
@@ -998,6 +989,21 @@ static int tcp_packet(struct nf_conn *ct,
 				  "state %s ", tcp_conntrack_names[old_state]);
 		return NF_ACCEPT;
 	case TCP_CONNTRACK_MAX:
+		/* Special case for SYN proxy: when the SYN to the server or
+		 * the SYN/ACK from the server is lost, the client may transmit
+		 * a keep-alive packet while in SYN_SENT state. This needs to
+		 * be associated with the original conntrack entry in order to
+		 * generate a new SYN with the correct sequence number.
+		 */
+		if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
+		    index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
+		    ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
+		    ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
+			pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
+			spin_unlock_bh(&ct->lock);
+			return NF_ACCEPT;
+		}
+
 		/* Invalid packet */
 		pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
 			 dir, get_conntrack_index(th), old_state);
@@ -1085,6 +1091,12 @@ static int tcp_packet(struct nf_conn *ct,
 			nf_ct_kill_acct(ct, ctinfo, skb);
 			return NF_ACCEPT;
 		}
+		/* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
+		 * pickup with loose=1. Avoid large ESTABLISHED timeout.
+		 */
+		if (new_state == TCP_CONNTRACK_ESTABLISHED &&
+		    timeout > timeouts[TCP_CONNTRACK_UNACK])
+			timeout = timeouts[TCP_CONNTRACK_UNACK];
 	} else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
 		   && (old_state == TCP_CONNTRACK_SYN_RECV
 		       || old_state == TCP_CONNTRACK_ESTABLISHED)
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
new file mode 100644
index 0000000..17c1bcb
--- /dev/null
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -0,0 +1,238 @@
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+
+int nf_ct_seqadj_init(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+		      s32 off)
+{
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_conn_seqadj *seqadj;
+	struct nf_ct_seqadj *this_way;
+
+	if (off == 0)
+		return 0;
+
+	set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+
+	seqadj = nfct_seqadj(ct);
+	this_way = &seqadj->seq[dir];
+	this_way->offset_before	 = off;
+	this_way->offset_after	 = off;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seqadj_init);
+
+int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+		     __be32 seq, s32 off)
+{
+	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_seqadj *this_way;
+
+	if (off == 0)
+		return 0;
+
+	set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+
+	spin_lock_bh(&ct->lock);
+	this_way = &seqadj->seq[dir];
+	if (this_way->offset_before == this_way->offset_after ||
+	    before(this_way->correction_pos, ntohl(seq))) {
+		this_way->correction_pos = ntohl(seq);
+		this_way->offset_before	 = this_way->offset_after;
+		this_way->offset_after	+= off;
+	}
+	spin_unlock_bh(&ct->lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seqadj_set);
+
+void nf_ct_tcp_seqadj_set(struct sk_buff *skb,
+			  struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+			  s32 off)
+{
+	const struct tcphdr *th;
+
+	if (nf_ct_protonum(ct) != IPPROTO_TCP)
+		return;
+
+	th = (struct tcphdr *)(skb_network_header(skb) + ip_hdrlen(skb));
+	nf_ct_seqadj_set(ct, ctinfo, th->seq, off);
+}
+EXPORT_SYMBOL_GPL(nf_ct_tcp_seqadj_set);
+
+/* Adjust one found SACK option including checksum correction */
+static void nf_ct_sack_block_adjust(struct sk_buff *skb,
+				    struct tcphdr *tcph,
+				    unsigned int sackoff,
+				    unsigned int sackend,
+				    struct nf_ct_seqadj *seq)
+{
+	while (sackoff < sackend) {
+		struct tcp_sack_block_wire *sack;
+		__be32 new_start_seq, new_end_seq;
+
+		sack = (void *)skb->data + sackoff;
+		if (after(ntohl(sack->start_seq) - seq->offset_before,
+			  seq->correction_pos))
+			new_start_seq = htonl(ntohl(sack->start_seq) -
+					seq->offset_after);
+		else
+			new_start_seq = htonl(ntohl(sack->start_seq) -
+					seq->offset_before);
+
+		if (after(ntohl(sack->end_seq) - seq->offset_before,
+			  seq->correction_pos))
+			new_end_seq = htonl(ntohl(sack->end_seq) -
+				      seq->offset_after);
+		else
+			new_end_seq = htonl(ntohl(sack->end_seq) -
+				      seq->offset_before);
+
+		pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
+			 ntohl(sack->start_seq), new_start_seq,
+			 ntohl(sack->end_seq), new_end_seq);
+
+		inet_proto_csum_replace4(&tcph->check, skb,
+					 sack->start_seq, new_start_seq, 0);
+		inet_proto_csum_replace4(&tcph->check, skb,
+					 sack->end_seq, new_end_seq, 0);
+		sack->start_seq = new_start_seq;
+		sack->end_seq = new_end_seq;
+		sackoff += sizeof(*sack);
+	}
+}
+
+/* TCP SACK sequence number adjustment */
+static unsigned int nf_ct_sack_adjust(struct sk_buff *skb,
+				      unsigned int protoff,
+				      struct tcphdr *tcph,
+				      struct nf_conn *ct,
+				      enum ip_conntrack_info ctinfo)
+{
+	unsigned int dir, optoff, optend;
+	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+
+	optoff = protoff + sizeof(struct tcphdr);
+	optend = protoff + tcph->doff * 4;
+
+	if (!skb_make_writable(skb, optend))
+		return 0;
+
+	dir = CTINFO2DIR(ctinfo);
+
+	while (optoff < optend) {
+		/* Usually: option, length. */
+		unsigned char *op = skb->data + optoff;
+
+		switch (op[0]) {
+		case TCPOPT_EOL:
+			return 1;
+		case TCPOPT_NOP:
+			optoff++;
+			continue;
+		default:
+			/* no partial options */
+			if (optoff + 1 == optend ||
+			    optoff + op[1] > optend ||
+			    op[1] < 2)
+				return 0;
+			if (op[0] == TCPOPT_SACK &&
+			    op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
+			    ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
+				nf_ct_sack_block_adjust(skb, tcph, optoff + 2,
+							optoff+op[1],
+							&seqadj->seq[!dir]);
+			optoff += op[1];
+		}
+	}
+	return 1;
+}
+
+/* TCP sequence number adjustment.  Returns 1 on success, 0 on failure */
+int nf_ct_seq_adjust(struct sk_buff *skb,
+		     struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+		     unsigned int protoff)
+{
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct tcphdr *tcph;
+	__be32 newseq, newack;
+	s32 seqoff, ackoff;
+	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+	struct nf_ct_seqadj *this_way, *other_way;
+	int res;
+
+	this_way  = &seqadj->seq[dir];
+	other_way = &seqadj->seq[!dir];
+
+	if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
+		return 0;
+
+	tcph = (void *)skb->data + protoff;
+	spin_lock_bh(&ct->lock);
+	if (after(ntohl(tcph->seq), this_way->correction_pos))
+		seqoff = this_way->offset_after;
+	else
+		seqoff = this_way->offset_before;
+
+	if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+		  other_way->correction_pos))
+		ackoff = other_way->offset_after;
+	else
+		ackoff = other_way->offset_before;
+
+	newseq = htonl(ntohl(tcph->seq) + seqoff);
+	newack = htonl(ntohl(tcph->ack_seq) - ackoff);
+
+	inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
+	inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
+
+	pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+		 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
+		 ntohl(newack));
+
+	tcph->seq = newseq;
+	tcph->ack_seq = newack;
+
+	res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+	spin_unlock_bh(&ct->lock);
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seq_adjust);
+
+s32 nf_ct_seq_offset(const struct nf_conn *ct,
+		     enum ip_conntrack_dir dir,
+		     u32 seq)
+{
+	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+	struct nf_ct_seqadj *this_way;
+
+	if (!seqadj)
+		return 0;
+
+	this_way = &seqadj->seq[dir];
+	return after(seq, this_way->correction_pos) ?
+		 this_way->offset_after : this_way->offset_before;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seq_offset);
+
+static struct nf_ct_ext_type nf_ct_seqadj_extend __read_mostly = {
+	.len	= sizeof(struct nf_conn_seqadj),
+	.align	= __alignof__(struct nf_conn_seqadj),
+	.id	= NF_CT_EXT_SEQADJ,
+};
+
+int nf_conntrack_seqadj_init(void)
+{
+	return nf_ct_extend_register(&nf_ct_seqadj_extend);
+}
+
+void nf_conntrack_seqadj_fini(void)
+{
+	nf_ct_extend_unregister(&nf_ct_seqadj_extend);
+}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index e0c4373..466410e 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -52,66 +52,8 @@ module_param(sip_direct_media, int, 0600);
 MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
 				   "endpoints only (default 1)");
 
-unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff,
-				unsigned int dataoff, const char **dptr,
-				unsigned int *datalen) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
-
-void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, unsigned int protoff,
-				   s16 off) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook);
-
-unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
-				       unsigned int protoff,
-				       unsigned int dataoff,
-				       const char **dptr,
-				       unsigned int *datalen,
-				       struct nf_conntrack_expect *exp,
-				       unsigned int matchoff,
-				       unsigned int matchlen) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook);
-
-unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int protoff,
-				     unsigned int dataoff,
-				     const char **dptr,
-				     unsigned int *datalen,
-				     unsigned int sdpoff,
-				     enum sdp_header_types type,
-				     enum sdp_header_types term,
-				     const union nf_inet_addr *addr)
-				     __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook);
-
-unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int protoff,
-				     unsigned int dataoff,
-				     const char **dptr,
-				     unsigned int *datalen,
-				     unsigned int matchoff,
-				     unsigned int matchlen,
-				     u_int16_t port) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook);
-
-unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
-					unsigned int protoff,
-					unsigned int dataoff,
-					const char **dptr,
-					unsigned int *datalen,
-					unsigned int sdpoff,
-					const union nf_inet_addr *addr)
-					__read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook);
-
-unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int protoff,
-				      unsigned int dataoff,
-				      const char **dptr,
-				      unsigned int *datalen,
-				      struct nf_conntrack_expect *rtp_exp,
-				      struct nf_conntrack_expect *rtcp_exp,
-				      unsigned int mediaoff,
-				      unsigned int medialen,
-				      union nf_inet_addr *rtp_addr)
-				      __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_media_hook);
+const struct nf_nat_sip_hooks *nf_nat_sip_hooks;
+EXPORT_SYMBOL_GPL(nf_nat_sip_hooks);
 
 static int string_len(const struct nf_conn *ct, const char *dptr,
 		      const char *limit, int *shift)
@@ -914,8 +856,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 	int direct_rtp = 0, skip_expect = 0, ret = NF_DROP;
 	u_int16_t base_port;
 	__be16 rtp_port, rtcp_port;
-	typeof(nf_nat_sdp_port_hook) nf_nat_sdp_port;
-	typeof(nf_nat_sdp_media_hook) nf_nat_sdp_media;
+	const struct nf_nat_sip_hooks *hooks;
 
 	saddr = NULL;
 	if (sip_direct_media) {
@@ -966,22 +907,23 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 #endif
 			skip_expect = 1;
 	} while (!skip_expect);
-	rcu_read_unlock();
 
 	base_port = ntohs(tuple.dst.u.udp.port) & ~1;
 	rtp_port = htons(base_port);
 	rtcp_port = htons(base_port + 1);
 
 	if (direct_rtp) {
-		nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook);
-		if (nf_nat_sdp_port &&
-		    !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen,
+		hooks = rcu_dereference(nf_nat_sip_hooks);
+		if (hooks &&
+		    !hooks->sdp_port(skb, protoff, dataoff, dptr, datalen,
 				     mediaoff, medialen, ntohs(rtp_port)))
 			goto err1;
 	}
 
-	if (skip_expect)
+	if (skip_expect) {
+		rcu_read_unlock();
 		return NF_ACCEPT;
+	}
 
 	rtp_exp = nf_ct_expect_alloc(ct);
 	if (rtp_exp == NULL)
@@ -995,10 +937,10 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 	nf_ct_expect_init(rtcp_exp, class, nf_ct_l3num(ct), saddr, daddr,
 			  IPPROTO_UDP, NULL, &rtcp_port);
 
-	nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook);
-	if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp)
-		ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen,
-				       rtp_exp, rtcp_exp,
+	hooks = rcu_dereference(nf_nat_sip_hooks);
+	if (hooks && ct->status & IPS_NAT_MASK && !direct_rtp)
+		ret = hooks->sdp_media(skb, protoff, dataoff, dptr,
+				       datalen, rtp_exp, rtcp_exp,
 				       mediaoff, medialen, daddr);
 	else {
 		if (nf_ct_expect_related(rtp_exp) == 0) {
@@ -1012,6 +954,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 err2:
 	nf_ct_expect_put(rtp_exp);
 err1:
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -1051,13 +994,12 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 	unsigned int caddr_len, maddr_len;
 	unsigned int i;
 	union nf_inet_addr caddr, maddr, rtp_addr;
+	const struct nf_nat_sip_hooks *hooks;
 	unsigned int port;
 	const struct sdp_media_type *t;
 	int ret = NF_ACCEPT;
-	typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr;
-	typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session;
 
-	nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook);
+	hooks = rcu_dereference(nf_nat_sip_hooks);
 
 	/* Find beginning of session description */
 	if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
@@ -1125,10 +1067,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 		}
 
 		/* Update media connection address if present */
-		if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) {
-			ret = nf_nat_sdp_addr(skb, protoff, dataoff,
+		if (maddr_len && hooks && ct->status & IPS_NAT_MASK) {
+			ret = hooks->sdp_addr(skb, protoff, dataoff,
 					      dptr, datalen, mediaoff,
-					      SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
+					      SDP_HDR_CONNECTION,
+					      SDP_HDR_MEDIA,
 					      &rtp_addr);
 			if (ret != NF_ACCEPT) {
 				nf_ct_helper_log(skb, ct, "cannot mangle SDP");
@@ -1139,10 +1082,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 	}
 
 	/* Update session connection and owner addresses */
-	nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook);
-	if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sdp_session(skb, protoff, dataoff,
-					 dptr, datalen, sdpoff, &rtp_addr);
+	hooks = rcu_dereference(nf_nat_sip_hooks);
+	if (hooks && ct->status & IPS_NAT_MASK)
+		ret = hooks->sdp_session(skb, protoff, dataoff,
+					 dptr, datalen, sdpoff,
+					 &rtp_addr);
 
 	return ret;
 }
@@ -1242,11 +1186,11 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
 	unsigned int matchoff, matchlen;
 	struct nf_conntrack_expect *exp;
 	union nf_inet_addr *saddr, daddr;
+	const struct nf_nat_sip_hooks *hooks;
 	__be16 port;
 	u8 proto;
 	unsigned int expires = 0;
 	int ret;
-	typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect;
 
 	/* Expected connections can not register again. */
 	if (ct->status & IPS_EXPECTED)
@@ -1309,10 +1253,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
 	exp->helper = nfct_help(ct)->helper;
 	exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
 
-	nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook);
-	if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen,
-					exp, matchoff, matchlen);
+	hooks = rcu_dereference(nf_nat_sip_hooks);
+	if (hooks && ct->status & IPS_NAT_MASK)
+		ret = hooks->expect(skb, protoff, dataoff, dptr, datalen,
+				    exp, matchoff, matchlen);
 	else {
 		if (nf_ct_expect_related(exp) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
@@ -1515,7 +1459,7 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
 			   unsigned int protoff, unsigned int dataoff,
 			   const char **dptr, unsigned int *datalen)
 {
-	typeof(nf_nat_sip_hook) nf_nat_sip;
+	const struct nf_nat_sip_hooks *hooks;
 	int ret;
 
 	if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
@@ -1524,9 +1468,9 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
 		ret = process_sip_response(skb, protoff, dataoff, dptr, datalen);
 
 	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
-		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
-		if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff,
-					      dptr, datalen)) {
+		hooks = rcu_dereference(nf_nat_sip_hooks);
+		if (hooks && !hooks->msg(skb, protoff, dataoff,
+					 dptr, datalen)) {
 			nf_ct_helper_log(skb, ct, "cannot NAT SIP message");
 			ret = NF_DROP;
 		}
@@ -1546,7 +1490,6 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 	s16 diff, tdiff = 0;
 	int ret = NF_ACCEPT;
 	bool term;
-	typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust;
 
 	if (ctinfo != IP_CT_ESTABLISHED &&
 	    ctinfo != IP_CT_ESTABLISHED_REPLY)
@@ -1610,9 +1553,11 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 	}
 
 	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
-		nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook);
-		if (nf_nat_sip_seq_adjust)
-			nf_nat_sip_seq_adjust(skb, protoff, tdiff);
+		const struct nf_nat_sip_hooks *hooks;
+
+		hooks = rcu_dereference(nf_nat_sip_hooks);
+		if (hooks)
+			hooks->seq_adjust(skb, protoff, tdiff);
 	}
 
 	return ret;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index bd700b4..f641751 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -408,7 +408,7 @@ static int log_invalid_proto_max = 255;
 
 static struct ctl_table_header *nf_ct_netfilter_header;
 
-static ctl_table nf_ct_sysctl_table[] = {
+static struct ctl_table nf_ct_sysctl_table[] = {
 	{
 		.procname	= "nf_conntrack_max",
 		.data		= &nf_conntrack_max,
@@ -458,7 +458,7 @@ static ctl_table nf_ct_sysctl_table[] = {
 
 #define NET_NF_CONNTRACK_MAX 2089
 
-static ctl_table nf_ct_netfilter_table[] = {
+static struct ctl_table nf_ct_netfilter_table[] = {
 	{
 		.procname	= "nf_conntrack_max",
 		.data		= &nf_conntrack_max,
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 3deec99..61a3c92 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -13,26 +13,20 @@
 
 
 /* core.c */
-extern unsigned int nf_iterate(struct list_head *head,
-				struct sk_buff *skb,
-				unsigned int hook,
-				const struct net_device *indev,
-				const struct net_device *outdev,
-				struct nf_hook_ops **elemp,
-				int (*okfn)(struct sk_buff *),
-				int hook_thresh);
+unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
+			unsigned int hook, const struct net_device *indev,
+			const struct net_device *outdev,
+			struct nf_hook_ops **elemp,
+			int (*okfn)(struct sk_buff *), int hook_thresh);
 
 /* nf_queue.c */
-extern int nf_queue(struct sk_buff *skb,
-		    struct nf_hook_ops *elem,
-		    u_int8_t pf, unsigned int hook,
-		    struct net_device *indev,
-		    struct net_device *outdev,
-		    int (*okfn)(struct sk_buff *),
-		    unsigned int queuenum);
-extern int __init netfilter_queue_init(void);
+int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, u_int8_t pf,
+	     unsigned int hook, struct net_device *indev,
+	     struct net_device *outdev, int (*okfn)(struct sk_buff *),
+	     unsigned int queuenum);
+int __init netfilter_queue_init(void);
 
 /* nf_log.c */
-extern int __init netfilter_log_init(void);
+int __init netfilter_log_init(void);
 
 #endif
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 3b18dd1..85296d4 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -245,7 +245,7 @@ static const struct file_operations nflog_file_ops = {
 static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
 static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
 
-static int nf_log_proc_dostring(ctl_table *table, int write,
+static int nf_log_proc_dostring(struct ctl_table *table, int write,
 			 void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	const struct nf_logger *logger;
@@ -369,9 +369,7 @@ static int __net_init nf_log_net_init(struct net *net)
 
 out_sysctl:
 #ifdef CONFIG_PROC_FS
-	/* For init_net: errors will trigger panic, don't unroll on error. */
-	if (!net_eq(net, &init_net))
-		remove_proc_entry("nf_log", net->nf.proc_netfilter);
+	remove_proc_entry("nf_log", net->nf.proc_netfilter);
 #endif
 	return ret;
 }
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 038eee5..63a8154 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -25,6 +25,7 @@
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_nat.h>
@@ -402,6 +403,9 @@ nf_nat_setup_info(struct nf_conn *ct,
 			ct->status |= IPS_SRC_NAT;
 		else
 			ct->status |= IPS_DST_NAT;
+
+		if (nfct_help(ct))
+			nfct_seqadj_ext_add(ct);
 	}
 
 	if (maniptype == NF_NAT_MANIP_SRC) {
@@ -428,6 +432,26 @@ nf_nat_setup_info(struct nf_conn *ct,
 }
 EXPORT_SYMBOL(nf_nat_setup_info);
 
+unsigned int
+nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+	/* Force range to this IP; let proto decide mapping for
+	 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+	 * Use reply in case it's already been mangled (eg local packet).
+	 */
+	union nf_inet_addr ip =
+		(HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
+		ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
+		ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
+	struct nf_nat_range range = {
+		.flags		= NF_NAT_RANGE_MAP_IPS,
+		.min_addr	= ip,
+		.max_addr	= ip,
+	};
+	return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+}
+EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding);
+
 /* Do packet manipulations according to nf_nat_setup_info. */
 unsigned int nf_nat_packet(struct nf_conn *ct,
 			   enum ip_conntrack_info ctinfo,
@@ -497,7 +521,7 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
 
 	rtnl_lock();
 	for_each_net(net)
-		nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean);
+		nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
 	rtnl_unlock();
 }
 
@@ -511,7 +535,7 @@ static void nf_nat_l3proto_clean(u8 l3proto)
 	rtnl_lock();
 
 	for_each_net(net)
-		nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean);
+		nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
 	rtnl_unlock();
 }
 
@@ -749,7 +773,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
 {
 	struct nf_nat_proto_clean clean = {};
 
-	nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean);
+	nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0);
 	synchronize_rcu();
 	nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
 }
@@ -764,10 +788,6 @@ static struct nf_ct_helper_expectfn follow_master_nat = {
 	.expectfn	= nf_nat_follow_master,
 };
 
-static struct nfq_ct_nat_hook nfq_ct_nat = {
-	.seq_adjust	= nf_nat_tcp_seq_adjust,
-};
-
 static int __init nf_nat_init(void)
 {
 	int ret;
@@ -787,14 +807,9 @@ static int __init nf_nat_init(void)
 	/* Initialize fake conntrack so that NAT will skip it */
 	nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
 
-	BUG_ON(nf_nat_seq_adjust_hook != NULL);
-	RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
 	BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
 	RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook,
 			   nfnetlink_parse_nat_setup);
-	BUG_ON(nf_ct_nat_offset != NULL);
-	RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset);
-	RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat);
 #ifdef CONFIG_XFRM
 	BUG_ON(nf_nat_decode_session_hook != NULL);
 	RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session);
@@ -813,10 +828,7 @@ static void __exit nf_nat_cleanup(void)
 	unregister_pernet_subsys(&nf_nat_net_ops);
 	nf_ct_extend_unregister(&nat_extend);
 	nf_ct_helper_expectfn_unregister(&follow_master_nat);
-	RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL);
 	RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
-	RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
-	RCU_INIT_POINTER(nfq_ct_nat_hook, NULL);
 #ifdef CONFIG_XFRM
 	RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
 #endif
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 5fea563..2840abb 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -20,74 +20,13 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_l3proto.h>
 #include <net/netfilter/nf_nat_l4proto.h>
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_helper.h>
 
-#define DUMP_OFFSET(x) \
-	pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
-		 x->offset_before, x->offset_after, x->correction_pos);
-
-static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
-
-/* Setup TCP sequence correction given this change at this sequence */
-static inline void
-adjust_tcp_sequence(u32 seq,
-		    int sizediff,
-		    struct nf_conn *ct,
-		    enum ip_conntrack_info ctinfo)
-{
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	struct nf_conn_nat *nat = nfct_nat(ct);
-	struct nf_nat_seq *this_way = &nat->seq[dir];
-
-	pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n",
-		 seq, sizediff);
-
-	pr_debug("adjust_tcp_sequence: Seq_offset before: ");
-	DUMP_OFFSET(this_way);
-
-	spin_lock_bh(&nf_nat_seqofs_lock);
-
-	/* SYN adjust. If it's uninitialized, or this is after last
-	 * correction, record it: we don't handle more than one
-	 * adjustment in the window, but do deal with common case of a
-	 * retransmit */
-	if (this_way->offset_before == this_way->offset_after ||
-	    before(this_way->correction_pos, seq)) {
-		this_way->correction_pos = seq;
-		this_way->offset_before = this_way->offset_after;
-		this_way->offset_after += sizediff;
-	}
-	spin_unlock_bh(&nf_nat_seqofs_lock);
-
-	pr_debug("adjust_tcp_sequence: Seq_offset after: ");
-	DUMP_OFFSET(this_way);
-}
-
-/* Get the offset value, for conntrack */
-s16 nf_nat_get_offset(const struct nf_conn *ct,
-		      enum ip_conntrack_dir dir,
-		      u32 seq)
-{
-	struct nf_conn_nat *nat = nfct_nat(ct);
-	struct nf_nat_seq *this_way;
-	s16 offset;
-
-	if (!nat)
-		return 0;
-
-	this_way = &nat->seq[dir];
-	spin_lock_bh(&nf_nat_seqofs_lock);
-	offset = after(seq, this_way->correction_pos)
-		 ? this_way->offset_after : this_way->offset_before;
-	spin_unlock_bh(&nf_nat_seqofs_lock);
-
-	return offset;
-}
-
 /* Frobs data inside this packet, which is linear. */
 static void mangle_contents(struct sk_buff *skb,
 			    unsigned int dataoff,
@@ -104,7 +43,7 @@ static void mangle_contents(struct sk_buff *skb,
 	/* move post-replacement */
 	memmove(data + match_offset + rep_len,
 		data + match_offset + match_len,
-		skb->tail - (skb->network_header + dataoff +
+		skb_tail_pointer(skb) - (skb_network_header(skb) + dataoff +
 			     match_offset + match_len));
 
 	/* insert data from buffer */
@@ -142,30 +81,6 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
 	return 1;
 }
 
-void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
-			   __be32 seq, s16 off)
-{
-	if (!off)
-		return;
-	set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
-	adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo);
-	nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
-}
-EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
-
-void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
-			   u32 ctinfo, int off)
-{
-	const struct tcphdr *th;
-
-	if (nf_ct_protonum(ct) != IPPROTO_TCP)
-		return;
-
-	th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb));
-	nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
-}
-EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust);
-
 /* Generic function for mangling variable-length address changes inside
  * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
  * command in FTP).
@@ -210,8 +125,8 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 			     datalen, oldlen);
 
 	if (adjust && rep_len != match_len)
-		nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
-				      (int)rep_len - (int)match_len);
+		nf_ct_seqadj_set(ct, ctinfo, tcph->seq,
+				 (int)rep_len - (int)match_len);
 
 	return 1;
 }
@@ -271,145 +186,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
 
-/* Adjust one found SACK option including checksum correction */
-static void
-sack_adjust(struct sk_buff *skb,
-	    struct tcphdr *tcph,
-	    unsigned int sackoff,
-	    unsigned int sackend,
-	    struct nf_nat_seq *natseq)
-{
-	while (sackoff < sackend) {
-		struct tcp_sack_block_wire *sack;
-		__be32 new_start_seq, new_end_seq;
-
-		sack = (void *)skb->data + sackoff;
-		if (after(ntohl(sack->start_seq) - natseq->offset_before,
-			  natseq->correction_pos))
-			new_start_seq = htonl(ntohl(sack->start_seq)
-					- natseq->offset_after);
-		else
-			new_start_seq = htonl(ntohl(sack->start_seq)
-					- natseq->offset_before);
-
-		if (after(ntohl(sack->end_seq) - natseq->offset_before,
-			  natseq->correction_pos))
-			new_end_seq = htonl(ntohl(sack->end_seq)
-				      - natseq->offset_after);
-		else
-			new_end_seq = htonl(ntohl(sack->end_seq)
-				      - natseq->offset_before);
-
-		pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
-			 ntohl(sack->start_seq), new_start_seq,
-			 ntohl(sack->end_seq), new_end_seq);
-
-		inet_proto_csum_replace4(&tcph->check, skb,
-					 sack->start_seq, new_start_seq, 0);
-		inet_proto_csum_replace4(&tcph->check, skb,
-					 sack->end_seq, new_end_seq, 0);
-		sack->start_seq = new_start_seq;
-		sack->end_seq = new_end_seq;
-		sackoff += sizeof(*sack);
-	}
-}
-
-/* TCP SACK sequence number adjustment */
-static inline unsigned int
-nf_nat_sack_adjust(struct sk_buff *skb,
-		   unsigned int protoff,
-		   struct tcphdr *tcph,
-		   struct nf_conn *ct,
-		   enum ip_conntrack_info ctinfo)
-{
-	unsigned int dir, optoff, optend;
-	struct nf_conn_nat *nat = nfct_nat(ct);
-
-	optoff = protoff + sizeof(struct tcphdr);
-	optend = protoff + tcph->doff * 4;
-
-	if (!skb_make_writable(skb, optend))
-		return 0;
-
-	dir = CTINFO2DIR(ctinfo);
-
-	while (optoff < optend) {
-		/* Usually: option, length. */
-		unsigned char *op = skb->data + optoff;
-
-		switch (op[0]) {
-		case TCPOPT_EOL:
-			return 1;
-		case TCPOPT_NOP:
-			optoff++;
-			continue;
-		default:
-			/* no partial options */
-			if (optoff + 1 == optend ||
-			    optoff + op[1] > optend ||
-			    op[1] < 2)
-				return 0;
-			if (op[0] == TCPOPT_SACK &&
-			    op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
-			    ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
-				sack_adjust(skb, tcph, optoff+2,
-					    optoff+op[1], &nat->seq[!dir]);
-			optoff += op[1];
-		}
-	}
-	return 1;
-}
-
-/* TCP sequence number adjustment.  Returns 1 on success, 0 on failure */
-int
-nf_nat_seq_adjust(struct sk_buff *skb,
-		  struct nf_conn *ct,
-		  enum ip_conntrack_info ctinfo,
-		  unsigned int protoff)
-{
-	struct tcphdr *tcph;
-	int dir;
-	__be32 newseq, newack;
-	s16 seqoff, ackoff;
-	struct nf_conn_nat *nat = nfct_nat(ct);
-	struct nf_nat_seq *this_way, *other_way;
-
-	dir = CTINFO2DIR(ctinfo);
-
-	this_way = &nat->seq[dir];
-	other_way = &nat->seq[!dir];
-
-	if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
-		return 0;
-
-	tcph = (void *)skb->data + protoff;
-	if (after(ntohl(tcph->seq), this_way->correction_pos))
-		seqoff = this_way->offset_after;
-	else
-		seqoff = this_way->offset_before;
-
-	if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
-		  other_way->correction_pos))
-		ackoff = other_way->offset_after;
-	else
-		ackoff = other_way->offset_before;
-
-	newseq = htonl(ntohl(tcph->seq) + seqoff);
-	newack = htonl(ntohl(tcph->ack_seq) - ackoff);
-
-	inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
-	inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
-
-	pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
-		 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
-		 ntohl(newack));
-
-	tcph->seq = newseq;
-	tcph->ack_seq = newack;
-
-	return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo);
-}
-
 /* Setup NAT on this expected conntrack so it follows master. */
 /* If we fail to get a free NAT slot, we'll get dropped on confirm */
 void nf_nat_follow_master(struct nf_conn *ct,
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index 396e55d..754536f 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -34,9 +34,7 @@ sctp_manip_pkt(struct sk_buff *skb,
 	       const struct nf_conntrack_tuple *tuple,
 	       enum nf_nat_manip_type maniptype)
 {
-	struct sk_buff *frag;
 	sctp_sctphdr_t *hdr;
-	__u32 crc32;
 
 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
 		return false;
@@ -51,11 +49,7 @@ sctp_manip_pkt(struct sk_buff *skb,
 		hdr->dest = tuple->dst.u.sctp.port;
 	}
 
-	crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff);
-	skb_walk_frags(skb, frag)
-		crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag),
-					  crc32);
-	hdr->checksum = sctp_end_cksum(crc32);
+	hdr->checksum = sctp_compute_cksum(skb, hdroff);
 
 	return true;
 }
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index dac11f7..b4d691d 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -20,6 +20,7 @@
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
 #include <linux/netfilter/nf_conntrack_sip.h>
 
 MODULE_LICENSE("GPL");
@@ -308,7 +309,7 @@ static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff,
 		return;
 
 	th = (struct tcphdr *)(skb->data + protoff);
-	nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
+	nf_ct_seqadj_set(ct, ctinfo, th->seq, off);
 }
 
 /* Handles expected signalling connections and media streams */
@@ -624,33 +625,26 @@ static struct nf_ct_helper_expectfn sip_nat = {
 
 static void __exit nf_nat_sip_fini(void)
 {
-	RCU_INIT_POINTER(nf_nat_sip_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL);
+	RCU_INIT_POINTER(nf_nat_sip_hooks, NULL);
+
 	nf_ct_helper_expectfn_unregister(&sip_nat);
 	synchronize_rcu();
 }
 
+static const struct nf_nat_sip_hooks sip_hooks = {
+	.msg		= nf_nat_sip,
+	.seq_adjust	= nf_nat_sip_seq_adjust,
+	.expect		= nf_nat_sip_expect,
+	.sdp_addr	= nf_nat_sdp_addr,
+	.sdp_port	= nf_nat_sdp_port,
+	.sdp_session	= nf_nat_sdp_session,
+	.sdp_media	= nf_nat_sdp_media,
+};
+
 static int __init nf_nat_sip_init(void)
 {
-	BUG_ON(nf_nat_sip_hook != NULL);
-	BUG_ON(nf_nat_sip_seq_adjust_hook != NULL);
-	BUG_ON(nf_nat_sip_expect_hook != NULL);
-	BUG_ON(nf_nat_sdp_addr_hook != NULL);
-	BUG_ON(nf_nat_sdp_port_hook != NULL);
-	BUG_ON(nf_nat_sdp_session_hook != NULL);
-	BUG_ON(nf_nat_sdp_media_hook != NULL);
-	RCU_INIT_POINTER(nf_nat_sip_hook, nf_nat_sip);
-	RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, nf_nat_sip_seq_adjust);
-	RCU_INIT_POINTER(nf_nat_sip_expect_hook, nf_nat_sip_expect);
-	RCU_INIT_POINTER(nf_nat_sdp_addr_hook, nf_nat_sdp_addr);
-	RCU_INIT_POINTER(nf_nat_sdp_port_hook, nf_nat_sdp_port);
-	RCU_INIT_POINTER(nf_nat_sdp_session_hook, nf_nat_sdp_session);
-	RCU_INIT_POINTER(nf_nat_sdp_media_hook, nf_nat_sdp_media);
+	BUG_ON(nf_nat_sip_hooks != NULL);
+	RCU_INIT_POINTER(nf_nat_sip_hooks, &sip_hooks);
 	nf_ct_helper_expectfn_register(&sip_nat);
 	return 0;
 }
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
new file mode 100644
index 0000000..9858e3e
--- /dev/null
+++ b/net/netfilter/nf_synproxy_core.c
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <asm/unaligned.h>
+#include <net/tcp.h>
+#include <net/netns/generic.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_tcpudp.h>
+#include <linux/netfilter/xt_SYNPROXY.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+
+int synproxy_net_id;
+EXPORT_SYMBOL_GPL(synproxy_net_id);
+
+bool
+synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
+		       const struct tcphdr *th, struct synproxy_options *opts)
+{
+	int length = (th->doff * 4) - sizeof(*th);
+	u8 buf[40], *ptr;
+
+	ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
+	if (ptr == NULL)
+		return false;
+
+	opts->options = 0;
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		switch (opcode) {
+		case TCPOPT_EOL:
+			return true;
+		case TCPOPT_NOP:
+			length--;
+			continue;
+		default:
+			opsize = *ptr++;
+			if (opsize < 2)
+				return true;
+			if (opsize > length)
+				return true;
+
+			switch (opcode) {
+			case TCPOPT_MSS:
+				if (opsize == TCPOLEN_MSS) {
+					opts->mss = get_unaligned_be16(ptr);
+					opts->options |= XT_SYNPROXY_OPT_MSS;
+				}
+				break;
+			case TCPOPT_WINDOW:
+				if (opsize == TCPOLEN_WINDOW) {
+					opts->wscale = *ptr;
+					if (opts->wscale > 14)
+						opts->wscale = 14;
+					opts->options |= XT_SYNPROXY_OPT_WSCALE;
+				}
+				break;
+			case TCPOPT_TIMESTAMP:
+				if (opsize == TCPOLEN_TIMESTAMP) {
+					opts->tsval = get_unaligned_be32(ptr);
+					opts->tsecr = get_unaligned_be32(ptr + 4);
+					opts->options |= XT_SYNPROXY_OPT_TIMESTAMP;
+				}
+				break;
+			case TCPOPT_SACK_PERM:
+				if (opsize == TCPOLEN_SACK_PERM)
+					opts->options |= XT_SYNPROXY_OPT_SACK_PERM;
+				break;
+			}
+
+			ptr += opsize - 2;
+			length -= opsize;
+		}
+	}
+	return true;
+}
+EXPORT_SYMBOL_GPL(synproxy_parse_options);
+
+unsigned int synproxy_options_size(const struct synproxy_options *opts)
+{
+	unsigned int size = 0;
+
+	if (opts->options & XT_SYNPROXY_OPT_MSS)
+		size += TCPOLEN_MSS_ALIGNED;
+	if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+		size += TCPOLEN_TSTAMP_ALIGNED;
+	else if (opts->options & XT_SYNPROXY_OPT_SACK_PERM)
+		size += TCPOLEN_SACKPERM_ALIGNED;
+	if (opts->options & XT_SYNPROXY_OPT_WSCALE)
+		size += TCPOLEN_WSCALE_ALIGNED;
+
+	return size;
+}
+EXPORT_SYMBOL_GPL(synproxy_options_size);
+
+void
+synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts)
+{
+	__be32 *ptr = (__be32 *)(th + 1);
+	u8 options = opts->options;
+
+	if (options & XT_SYNPROXY_OPT_MSS)
+		*ptr++ = htonl((TCPOPT_MSS << 24) |
+			       (TCPOLEN_MSS << 16) |
+			       opts->mss);
+
+	if (options & XT_SYNPROXY_OPT_TIMESTAMP) {
+		if (options & XT_SYNPROXY_OPT_SACK_PERM)
+			*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
+				       (TCPOLEN_SACK_PERM << 16) |
+				       (TCPOPT_TIMESTAMP << 8) |
+				       TCPOLEN_TIMESTAMP);
+		else
+			*ptr++ = htonl((TCPOPT_NOP << 24) |
+				       (TCPOPT_NOP << 16) |
+				       (TCPOPT_TIMESTAMP << 8) |
+				       TCPOLEN_TIMESTAMP);
+
+		*ptr++ = htonl(opts->tsval);
+		*ptr++ = htonl(opts->tsecr);
+	} else if (options & XT_SYNPROXY_OPT_SACK_PERM)
+		*ptr++ = htonl((TCPOPT_NOP << 24) |
+			       (TCPOPT_NOP << 16) |
+			       (TCPOPT_SACK_PERM << 8) |
+			       TCPOLEN_SACK_PERM);
+
+	if (options & XT_SYNPROXY_OPT_WSCALE)
+		*ptr++ = htonl((TCPOPT_NOP << 24) |
+			       (TCPOPT_WINDOW << 16) |
+			       (TCPOLEN_WINDOW << 8) |
+			       opts->wscale);
+}
+EXPORT_SYMBOL_GPL(synproxy_build_options);
+
+void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info,
+				    struct synproxy_options *opts)
+{
+	opts->tsecr = opts->tsval;
+	opts->tsval = tcp_time_stamp & ~0x3f;
+
+	if (opts->options & XT_SYNPROXY_OPT_WSCALE) {
+		opts->tsval |= opts->wscale;
+		opts->wscale = info->wscale;
+	} else
+		opts->tsval |= 0xf;
+
+	if (opts->options & XT_SYNPROXY_OPT_SACK_PERM)
+		opts->tsval |= 1 << 4;
+
+	if (opts->options & XT_SYNPROXY_OPT_ECN)
+		opts->tsval |= 1 << 5;
+}
+EXPORT_SYMBOL_GPL(synproxy_init_timestamp_cookie);
+
+void synproxy_check_timestamp_cookie(struct synproxy_options *opts)
+{
+	opts->wscale = opts->tsecr & 0xf;
+	if (opts->wscale != 0xf)
+		opts->options |= XT_SYNPROXY_OPT_WSCALE;
+
+	opts->options |= opts->tsecr & (1 << 4) ? XT_SYNPROXY_OPT_SACK_PERM : 0;
+
+	opts->options |= opts->tsecr & (1 << 5) ? XT_SYNPROXY_OPT_ECN : 0;
+}
+EXPORT_SYMBOL_GPL(synproxy_check_timestamp_cookie);
+
+unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
+				    unsigned int protoff,
+				    struct tcphdr *th,
+				    struct nf_conn *ct,
+				    enum ip_conntrack_info ctinfo,
+				    const struct nf_conn_synproxy *synproxy)
+{
+	unsigned int optoff, optend;
+	u32 *ptr, old;
+
+	if (synproxy->tsoff == 0)
+		return 1;
+
+	optoff = protoff + sizeof(struct tcphdr);
+	optend = protoff + th->doff * 4;
+
+	if (!skb_make_writable(skb, optend))
+		return 0;
+
+	while (optoff < optend) {
+		unsigned char *op = skb->data + optoff;
+
+		switch (op[0]) {
+		case TCPOPT_EOL:
+			return 1;
+		case TCPOPT_NOP:
+			optoff++;
+			continue;
+		default:
+			if (optoff + 1 == optend ||
+			    optoff + op[1] > optend ||
+			    op[1] < 2)
+				return 0;
+			if (op[0] == TCPOPT_TIMESTAMP &&
+			    op[1] == TCPOLEN_TIMESTAMP) {
+				if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+					ptr = (u32 *)&op[2];
+					old = *ptr;
+					*ptr = htonl(ntohl(*ptr) -
+						     synproxy->tsoff);
+				} else {
+					ptr = (u32 *)&op[6];
+					old = *ptr;
+					*ptr = htonl(ntohl(*ptr) +
+						     synproxy->tsoff);
+				}
+				inet_proto_csum_replace4(&th->check, skb,
+							 old, *ptr, 0);
+				return 1;
+			}
+			optoff += op[1];
+		}
+	}
+	return 1;
+}
+EXPORT_SYMBOL_GPL(synproxy_tstamp_adjust);
+
+static struct nf_ct_ext_type nf_ct_synproxy_extend __read_mostly = {
+	.len		= sizeof(struct nf_conn_synproxy),
+	.align		= __alignof__(struct nf_conn_synproxy),
+	.id		= NF_CT_EXT_SYNPROXY,
+};
+
+#ifdef CONFIG_PROC_FS
+static void *synproxy_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq));
+	int cpu;
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	for (cpu = *pos - 1; cpu < nr_cpu_ids; cpu++) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu + 1;
+		return per_cpu_ptr(snet->stats, cpu);
+	}
+
+	return NULL;
+}
+
+static void *synproxy_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq));
+	int cpu;
+
+	for (cpu = *pos; cpu < nr_cpu_ids; cpu++) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu + 1;
+		return per_cpu_ptr(snet->stats, cpu);
+	}
+
+	return NULL;
+}
+
+static void synproxy_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+	return;
+}
+
+static int synproxy_cpu_seq_show(struct seq_file *seq, void *v)
+{
+	struct synproxy_stats *stats = v;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, "entries\t\tsyn_received\t"
+				"cookie_invalid\tcookie_valid\t"
+				"cookie_retrans\tconn_reopened\n");
+		return 0;
+	}
+
+	seq_printf(seq, "%08x\t%08x\t%08x\t%08x\t%08x\t%08x\n", 0,
+		   stats->syn_received,
+		   stats->cookie_invalid,
+		   stats->cookie_valid,
+		   stats->cookie_retrans,
+		   stats->conn_reopened);
+
+	return 0;
+}
+
+static const struct seq_operations synproxy_cpu_seq_ops = {
+	.start		= synproxy_cpu_seq_start,
+	.next		= synproxy_cpu_seq_next,
+	.stop		= synproxy_cpu_seq_stop,
+	.show		= synproxy_cpu_seq_show,
+};
+
+static int synproxy_cpu_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &synproxy_cpu_seq_ops,
+			    sizeof(struct seq_net_private));
+}
+
+static const struct file_operations synproxy_cpu_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= synproxy_cpu_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_net,
+};
+
+static int __net_init synproxy_proc_init(struct net *net)
+{
+	if (!proc_create("synproxy", S_IRUGO, net->proc_net_stat,
+			 &synproxy_cpu_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
+
+static void __net_exit synproxy_proc_exit(struct net *net)
+{
+	remove_proc_entry("synproxy", net->proc_net_stat);
+}
+#else
+static int __net_init synproxy_proc_init(struct net *net)
+{
+	return 0;
+}
+
+static void __net_exit synproxy_proc_exit(struct net *net)
+{
+	return;
+}
+#endif /* CONFIG_PROC_FS */
+
+static int __net_init synproxy_net_init(struct net *net)
+{
+	struct synproxy_net *snet = synproxy_pernet(net);
+	struct nf_conntrack_tuple t;
+	struct nf_conn *ct;
+	int err = -ENOMEM;
+
+	memset(&t, 0, sizeof(t));
+	ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL);
+	if (IS_ERR(ct)) {
+		err = PTR_ERR(ct);
+		goto err1;
+	}
+
+	if (!nfct_seqadj_ext_add(ct))
+		goto err2;
+	if (!nfct_synproxy_ext_add(ct))
+		goto err2;
+	__set_bit(IPS_TEMPLATE_BIT, &ct->status);
+	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
+
+	snet->tmpl = ct;
+
+	snet->stats = alloc_percpu(struct synproxy_stats);
+	if (snet->stats == NULL)
+		goto err2;
+
+	err = synproxy_proc_init(net);
+	if (err < 0)
+		goto err3;
+
+	return 0;
+
+err3:
+	free_percpu(snet->stats);
+err2:
+	nf_conntrack_free(ct);
+err1:
+	return err;
+}
+
+static void __net_exit synproxy_net_exit(struct net *net)
+{
+	struct synproxy_net *snet = synproxy_pernet(net);
+
+	nf_conntrack_free(snet->tmpl);
+	synproxy_proc_exit(net);
+	free_percpu(snet->stats);
+}
+
+static struct pernet_operations synproxy_net_ops = {
+	.init		= synproxy_net_init,
+	.exit		= synproxy_net_exit,
+	.id		= &synproxy_net_id,
+	.size		= sizeof(struct synproxy_net),
+};
+
+static int __init synproxy_core_init(void)
+{
+	int err;
+
+	err = nf_ct_extend_register(&nf_ct_synproxy_extend);
+	if (err < 0)
+		goto err1;
+
+	err = register_pernet_subsys(&synproxy_net_ops);
+	if (err < 0)
+		goto err2;
+
+	return 0;
+
+err2:
+	nf_ct_extend_unregister(&nf_ct_synproxy_extend);
+err1:
+	return err;
+}
+
+static void __exit synproxy_core_exit(void)
+{
+	unregister_pernet_subsys(&synproxy_net_ops);
+	nf_ct_extend_unregister(&nf_ct_synproxy_extend);
+}
+
+module_init(synproxy_core_init);
+module_exit(synproxy_core_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
new file mode 100644
index 0000000..dcddc49
--- /dev/null
+++ b/net/netfilter/nf_tables_api.c
@@ -0,0 +1,3275 @@
+/*
+ * Copyright (c) 2007-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+static LIST_HEAD(nf_tables_expressions);
+
+/**
+ *	nft_register_afinfo - register nf_tables address family info
+ *
+ *	@afi: address family info to register
+ *
+ *	Register the address family for use with nf_tables. Returns zero on
+ *	success or a negative errno code otherwise.
+ */
+int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
+{
+	INIT_LIST_HEAD(&afi->tables);
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_add_tail(&afi->list, &net->nft.af_info);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_afinfo);
+
+/**
+ *	nft_unregister_afinfo - unregister nf_tables address family info
+ *
+ *	@afi: address family info to unregister
+ *
+ *	Unregister the address family for use with nf_tables.
+ */
+void nft_unregister_afinfo(struct nft_af_info *afi)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_del(&afi->list);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
+
+static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family)
+{
+	struct nft_af_info *afi;
+
+	list_for_each_entry(afi, &net->nft.af_info, list) {
+		if (afi->family == family)
+			return afi;
+	}
+	return NULL;
+}
+
+static struct nft_af_info *
+nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
+{
+	struct nft_af_info *afi;
+
+	afi = nft_afinfo_lookup(net, family);
+	if (afi != NULL)
+		return afi;
+#ifdef CONFIG_MODULES
+	if (autoload) {
+		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+		request_module("nft-afinfo-%u", family);
+		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		afi = nft_afinfo_lookup(net, family);
+		if (afi != NULL)
+			return ERR_PTR(-EAGAIN);
+	}
+#endif
+	return ERR_PTR(-EAFNOSUPPORT);
+}
+
+/*
+ * Tables
+ */
+
+static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
+					  const struct nlattr *nla)
+{
+	struct nft_table *table;
+
+	list_for_each_entry(table, &afi->tables, list) {
+		if (!nla_strcmp(nla, table->name))
+			return table;
+	}
+	return NULL;
+}
+
+static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
+						const struct nlattr *nla)
+{
+	struct nft_table *table;
+
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	table = nft_table_lookup(afi, nla);
+	if (table != NULL)
+		return table;
+
+	return ERR_PTR(-ENOENT);
+}
+
+static inline u64 nf_tables_alloc_handle(struct nft_table *table)
+{
+	return ++table->hgenerator;
+}
+
+static struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX];
+
+static int __nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
+{
+	int i;
+
+	for (i=0; i<NFT_CHAIN_T_MAX; i++) {
+		if (chain_type[family][i] != NULL &&
+		    !nla_strcmp(nla, chain_type[family][i]->name))
+			return i;
+	}
+	return -1;
+}
+
+static int nf_tables_chain_type_lookup(const struct nft_af_info *afi,
+				       const struct nlattr *nla,
+				       bool autoload)
+{
+	int type;
+
+	type = __nf_tables_chain_type_lookup(afi->family, nla);
+#ifdef CONFIG_MODULES
+	if (type < 0 && autoload) {
+		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+		request_module("nft-chain-%u-%*.s", afi->family,
+			       nla_len(nla)-1, (const char *)nla_data(nla));
+		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		type = __nf_tables_chain_type_lookup(afi->family, nla);
+	}
+#endif
+	return type;
+}
+
+static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
+	[NFTA_TABLE_NAME]	= { .type = NLA_STRING },
+	[NFTA_TABLE_FLAGS]	= { .type = NLA_U32 },
+};
+
+static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
+				     int event, u32 flags, int family,
+				     const struct nft_table *table)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+
+	event |= NFNL_SUBSYS_NFTABLES << 8;
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+	if (nlh == NULL)
+		goto nla_put_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family	= family;
+	nfmsg->version		= NFNETLINK_V0;
+	nfmsg->res_id		= 0;
+
+	if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
+	    nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)))
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static int nf_tables_table_notify(const struct sk_buff *oskb,
+				  const struct nlmsghdr *nlh,
+				  const struct nft_table *table,
+				  int event, int family)
+{
+	struct sk_buff *skb;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+	struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+	bool report;
+	int err;
+
+	report = nlh ? nlmsg_report(nlh) : false;
+	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+		return 0;
+
+	err = -ENOBUFS;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	err = nf_tables_fill_table_info(skb, portid, seq, event, 0,
+					family, table);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+			     GFP_KERNEL);
+err:
+	if (err < 0)
+		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	return err;
+}
+
+static int nf_tables_dump_tables(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	unsigned int idx = 0, s_idx = cb->args[0];
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+
+	list_for_each_entry(afi, &net->nft.af_info, list) {
+		if (family != NFPROTO_UNSPEC && family != afi->family)
+			continue;
+
+		list_for_each_entry(table, &afi->tables, list) {
+			if (idx < s_idx)
+				goto cont;
+			if (idx > s_idx)
+				memset(&cb->args[1], 0,
+				       sizeof(cb->args) - sizeof(cb->args[0]));
+			if (nf_tables_fill_table_info(skb,
+						      NETLINK_CB(cb->skb).portid,
+						      cb->nlh->nlmsg_seq,
+						      NFT_MSG_NEWTABLE,
+						      NLM_F_MULTI,
+						      afi->family, table) < 0)
+				goto done;
+cont:
+			idx++;
+		}
+	}
+done:
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	struct sk_buff *skb2;
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+	int err;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nf_tables_dump_tables,
+		};
+		return netlink_dump_start(nlsk, skb, nlh, &c);
+	}
+
+	afi = nf_tables_afinfo_lookup(net, family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid,
+					nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
+					family, table);
+	if (err < 0)
+		goto err;
+
+	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+	kfree_skb(skb2);
+	return err;
+}
+
+static int nf_tables_table_enable(struct nft_table *table)
+{
+	struct nft_chain *chain;
+	int err, i = 0;
+
+	list_for_each_entry(chain, &table->chains, list) {
+		err = nf_register_hook(&nft_base_chain(chain)->ops);
+		if (err < 0)
+			goto err;
+
+		i++;
+	}
+	return 0;
+err:
+	list_for_each_entry(chain, &table->chains, list) {
+		if (i-- <= 0)
+			break;
+
+		nf_unregister_hook(&nft_base_chain(chain)->ops);
+	}
+	return err;
+}
+
+static int nf_tables_table_disable(struct nft_table *table)
+{
+	struct nft_chain *chain;
+
+	list_for_each_entry(chain, &table->chains, list)
+		nf_unregister_hook(&nft_base_chain(chain)->ops);
+
+	return 0;
+}
+
+static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[],
+			      struct nft_af_info *afi, struct nft_table *table)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	int family = nfmsg->nfgen_family, ret = 0;
+
+	if (nla[NFTA_TABLE_FLAGS]) {
+		__be32 flags;
+
+		flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
+		if (flags & ~NFT_TABLE_F_DORMANT)
+			return -EINVAL;
+
+		if ((flags & NFT_TABLE_F_DORMANT) &&
+		    !(table->flags & NFT_TABLE_F_DORMANT)) {
+			ret = nf_tables_table_disable(table);
+			if (ret >= 0)
+				table->flags |= NFT_TABLE_F_DORMANT;
+		} else if (!(flags & NFT_TABLE_F_DORMANT) &&
+			   table->flags & NFT_TABLE_F_DORMANT) {
+			ret = nf_tables_table_enable(table);
+			if (ret >= 0)
+				table->flags &= ~NFT_TABLE_F_DORMANT;
+		}
+		if (ret < 0)
+			goto err;
+	}
+
+	nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+err:
+	return ret;
+}
+
+static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nlattr *name;
+	struct nft_af_info *afi;
+	struct nft_table *table;
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+
+	afi = nf_tables_afinfo_lookup(net, family, true);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	name = nla[NFTA_TABLE_NAME];
+	table = nf_tables_table_lookup(afi, name);
+	if (IS_ERR(table)) {
+		if (PTR_ERR(table) != -ENOENT)
+			return PTR_ERR(table);
+		table = NULL;
+	}
+
+	if (table != NULL) {
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+		return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table);
+	}
+
+	table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL);
+	if (table == NULL)
+		return -ENOMEM;
+
+	nla_strlcpy(table->name, name, nla_len(name));
+	INIT_LIST_HEAD(&table->chains);
+	INIT_LIST_HEAD(&table->sets);
+
+	if (nla[NFTA_TABLE_FLAGS]) {
+		__be32 flags;
+
+		flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
+		if (flags & ~NFT_TABLE_F_DORMANT) {
+			kfree(table);
+			return -EINVAL;
+		}
+
+		table->flags |= flags;
+	}
+
+	list_add_tail(&table->list, &afi->tables);
+	nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+	return 0;
+}
+
+static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	struct nft_af_info *afi;
+	struct nft_table *table;
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+
+	afi = nf_tables_afinfo_lookup(net, family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	if (table->use)
+		return -EBUSY;
+
+	list_del(&table->list);
+	nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
+	kfree(table);
+	return 0;
+}
+
+int nft_register_chain_type(struct nf_chain_type *ctype)
+{
+	int err = 0;
+
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	if (chain_type[ctype->family][ctype->type] != NULL) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	if (!try_module_get(ctype->me))
+		goto out;
+
+	chain_type[ctype->family][ctype->type] = ctype;
+out:
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	return err;
+}
+EXPORT_SYMBOL_GPL(nft_register_chain_type);
+
+void nft_unregister_chain_type(struct nf_chain_type *ctype)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	chain_type[ctype->family][ctype->type] = NULL;
+	module_put(ctype->me);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
+
+/*
+ * Chains
+ */
+
+static struct nft_chain *
+nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
+{
+	struct nft_chain *chain;
+
+	list_for_each_entry(chain, &table->chains, list) {
+		if (chain->handle == handle)
+			return chain;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
+						const struct nlattr *nla)
+{
+	struct nft_chain *chain;
+
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	list_for_each_entry(chain, &table->chains, list) {
+		if (!nla_strcmp(nla, chain->name))
+			return chain;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
+	[NFTA_CHAIN_TABLE]	= { .type = NLA_STRING },
+	[NFTA_CHAIN_HANDLE]	= { .type = NLA_U64 },
+	[NFTA_CHAIN_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
+	[NFTA_CHAIN_HOOK]	= { .type = NLA_NESTED },
+	[NFTA_CHAIN_POLICY]	= { .type = NLA_U32 },
+	[NFTA_CHAIN_TYPE]	= { .type = NLA_NUL_STRING },
+	[NFTA_CHAIN_COUNTERS]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
+	[NFTA_HOOK_HOOKNUM]	= { .type = NLA_U32 },
+	[NFTA_HOOK_PRIORITY]	= { .type = NLA_U32 },
+};
+
+static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
+{
+	struct nft_stats *cpu_stats, total;
+	struct nlattr *nest;
+	int cpu;
+
+	memset(&total, 0, sizeof(total));
+	for_each_possible_cpu(cpu) {
+		cpu_stats = per_cpu_ptr(stats, cpu);
+		total.pkts += cpu_stats->pkts;
+		total.bytes += cpu_stats->bytes;
+	}
+	nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts)) ||
+	    nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
+				     int event, u32 flags, int family,
+				     const struct nft_table *table,
+				     const struct nft_chain *chain)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+
+	event |= NFNL_SUBSYS_NFTABLES << 8;
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+	if (nlh == NULL)
+		goto nla_put_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family	= family;
+	nfmsg->version		= NFNETLINK_V0;
+	nfmsg->res_id		= 0;
+
+	if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle)))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name))
+		goto nla_put_failure;
+
+	if (chain->flags & NFT_BASE_CHAIN) {
+		const struct nft_base_chain *basechain = nft_base_chain(chain);
+		const struct nf_hook_ops *ops = &basechain->ops;
+		struct nlattr *nest;
+
+		nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
+		if (nest == NULL)
+			goto nla_put_failure;
+		if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
+			goto nla_put_failure;
+		if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+			goto nla_put_failure;
+		nla_nest_end(skb, nest);
+
+		if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
+				 htonl(basechain->policy)))
+			goto nla_put_failure;
+
+		if (nla_put_string(skb, NFTA_CHAIN_TYPE,
+			chain_type[ops->pf][nft_base_chain(chain)->type]->name))
+				goto nla_put_failure;
+
+		if (nft_dump_stats(skb, nft_base_chain(chain)->stats))
+			goto nla_put_failure;
+	}
+
+	if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static int nf_tables_chain_notify(const struct sk_buff *oskb,
+				  const struct nlmsghdr *nlh,
+				  const struct nft_table *table,
+				  const struct nft_chain *chain,
+				  int event, int family)
+{
+	struct sk_buff *skb;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+	bool report;
+	int err;
+
+	report = nlh ? nlmsg_report(nlh) : false;
+	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+		return 0;
+
+	err = -ENOBUFS;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family,
+					table, chain);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+			     GFP_KERNEL);
+err:
+	if (err < 0)
+		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	return err;
+}
+
+static int nf_tables_dump_chains(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+	unsigned int idx = 0, s_idx = cb->args[0];
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+
+	list_for_each_entry(afi, &net->nft.af_info, list) {
+		if (family != NFPROTO_UNSPEC && family != afi->family)
+			continue;
+
+		list_for_each_entry(table, &afi->tables, list) {
+			list_for_each_entry(chain, &table->chains, list) {
+				if (idx < s_idx)
+					goto cont;
+				if (idx > s_idx)
+					memset(&cb->args[1], 0,
+					       sizeof(cb->args) - sizeof(cb->args[0]));
+				if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid,
+							      cb->nlh->nlmsg_seq,
+							      NFT_MSG_NEWCHAIN,
+							      NLM_F_MULTI,
+							      afi->family, table, chain) < 0)
+					goto done;
+cont:
+				idx++;
+			}
+		}
+	}
+done:
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+
+static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+	struct sk_buff *skb2;
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+	int err;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nf_tables_dump_chains,
+		};
+		return netlink_dump_start(nlsk, skb, nlh, &c);
+	}
+
+	afi = nf_tables_afinfo_lookup(net, family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid,
+					nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
+					family, table, chain);
+	if (err < 0)
+		goto err;
+
+	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+	kfree_skb(skb2);
+	return err;
+}
+
+static int
+nf_tables_chain_policy(struct nft_base_chain *chain, const struct nlattr *attr)
+{
+	switch (ntohl(nla_get_be32(attr))) {
+	case NF_DROP:
+		chain->policy = NF_DROP;
+		break;
+	case NF_ACCEPT:
+		chain->policy = NF_ACCEPT;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
+	[NFTA_COUNTER_PACKETS]	= { .type = NLA_U64 },
+	[NFTA_COUNTER_BYTES]	= { .type = NLA_U64 },
+};
+
+static int
+nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
+{
+	struct nlattr *tb[NFTA_COUNTER_MAX+1];
+	struct nft_stats __percpu *newstats;
+	struct nft_stats *stats;
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
+		return -EINVAL;
+
+	newstats = alloc_percpu(struct nft_stats);
+	if (newstats == NULL)
+		return -ENOMEM;
+
+	/* Restore old counters on this cpu, no problem. Per-cpu statistics
+	 * are not exposed to userspace.
+	 */
+	stats = this_cpu_ptr(newstats);
+	stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+	stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+
+	if (chain->stats) {
+		/* nfnl_lock is held, add some nfnl function for this, later */
+		struct nft_stats __percpu *oldstats =
+			rcu_dereference_protected(chain->stats, 1);
+
+		rcu_assign_pointer(chain->stats, newstats);
+		synchronize_rcu();
+		free_percpu(oldstats);
+	} else
+		rcu_assign_pointer(chain->stats, newstats);
+
+	return 0;
+}
+
+static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nlattr * uninitialized_var(name);
+	const struct nft_af_info *afi;
+	struct nft_table *table;
+	struct nft_chain *chain;
+	struct nft_base_chain *basechain = NULL;
+	struct nlattr *ha[NFTA_HOOK_MAX + 1];
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+	u64 handle = 0;
+	int err;
+	bool create;
+
+	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+	afi = nf_tables_afinfo_lookup(net, family, true);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	if (table->use == UINT_MAX)
+		return -EOVERFLOW;
+
+	chain = NULL;
+	name = nla[NFTA_CHAIN_NAME];
+
+	if (nla[NFTA_CHAIN_HANDLE]) {
+		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+		chain = nf_tables_chain_lookup_byhandle(table, handle);
+		if (IS_ERR(chain))
+			return PTR_ERR(chain);
+	} else {
+		chain = nf_tables_chain_lookup(table, name);
+		if (IS_ERR(chain)) {
+			if (PTR_ERR(chain) != -ENOENT)
+				return PTR_ERR(chain);
+			chain = NULL;
+		}
+	}
+
+	if (chain != NULL) {
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+
+		if (nla[NFTA_CHAIN_HANDLE] && name &&
+		    !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME])))
+			return -EEXIST;
+
+		if (nla[NFTA_CHAIN_POLICY]) {
+			if (!(chain->flags & NFT_BASE_CHAIN))
+				return -EOPNOTSUPP;
+
+			err = nf_tables_chain_policy(nft_base_chain(chain),
+						     nla[NFTA_CHAIN_POLICY]);
+			if (err < 0)
+				return err;
+		}
+
+		if (nla[NFTA_CHAIN_COUNTERS]) {
+			if (!(chain->flags & NFT_BASE_CHAIN))
+				return -EOPNOTSUPP;
+
+			err = nf_tables_counters(nft_base_chain(chain),
+						 nla[NFTA_CHAIN_COUNTERS]);
+			if (err < 0)
+				return err;
+		}
+
+		if (nla[NFTA_CHAIN_HANDLE] && name)
+			nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+
+		goto notify;
+	}
+
+	if (nla[NFTA_CHAIN_HOOK]) {
+		struct nf_hook_ops *ops;
+		nf_hookfn *hookfn;
+		u32 hooknum;
+		int type = NFT_CHAIN_T_DEFAULT;
+
+		if (nla[NFTA_CHAIN_TYPE]) {
+			type = nf_tables_chain_type_lookup(afi,
+							   nla[NFTA_CHAIN_TYPE],
+							   create);
+			if (type < 0)
+				return -ENOENT;
+		}
+
+		err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
+				       nft_hook_policy);
+		if (err < 0)
+			return err;
+		if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
+		    ha[NFTA_HOOK_PRIORITY] == NULL)
+			return -EINVAL;
+
+		hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+		if (hooknum >= afi->nhooks)
+			return -EINVAL;
+
+		hookfn = chain_type[family][type]->fn[hooknum];
+		if (hookfn == NULL)
+			return -EOPNOTSUPP;
+
+		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
+		if (basechain == NULL)
+			return -ENOMEM;
+
+		basechain->type = type;
+		chain = &basechain->chain;
+
+		ops = &basechain->ops;
+		ops->pf		= family;
+		ops->owner	= afi->owner;
+		ops->hooknum	= ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+		ops->priority	= ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
+		ops->priv	= chain;
+		ops->hook       = hookfn;
+		if (afi->hooks[ops->hooknum])
+			ops->hook = afi->hooks[ops->hooknum];
+
+		chain->flags |= NFT_BASE_CHAIN;
+
+		if (nla[NFTA_CHAIN_POLICY]) {
+			err = nf_tables_chain_policy(basechain,
+						     nla[NFTA_CHAIN_POLICY]);
+			if (err < 0) {
+				free_percpu(basechain->stats);
+				kfree(basechain);
+				return err;
+			}
+		} else
+			basechain->policy = NF_ACCEPT;
+
+		if (nla[NFTA_CHAIN_COUNTERS]) {
+			err = nf_tables_counters(basechain,
+						 nla[NFTA_CHAIN_COUNTERS]);
+			if (err < 0) {
+				free_percpu(basechain->stats);
+				kfree(basechain);
+				return err;
+			}
+		} else {
+			struct nft_stats __percpu *newstats;
+
+			newstats = alloc_percpu(struct nft_stats);
+			if (newstats == NULL)
+				return -ENOMEM;
+
+			rcu_assign_pointer(nft_base_chain(chain)->stats,
+					   newstats);
+		}
+	} else {
+		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
+		if (chain == NULL)
+			return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&chain->rules);
+	chain->handle = nf_tables_alloc_handle(table);
+	chain->net = net;
+	chain->table = table;
+	nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+
+	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+	    chain->flags & NFT_BASE_CHAIN) {
+		err = nf_register_hook(&nft_base_chain(chain)->ops);
+		if (err < 0) {
+			free_percpu(basechain->stats);
+			kfree(basechain);
+			return err;
+		}
+	}
+	list_add_tail(&chain->list, &table->chains);
+	table->use++;
+notify:
+	nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN,
+			       family);
+	return 0;
+}
+
+static void nf_tables_rcu_chain_destroy(struct rcu_head *head)
+{
+	struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head);
+
+	BUG_ON(chain->use > 0);
+
+	if (chain->flags & NFT_BASE_CHAIN) {
+		free_percpu(nft_base_chain(chain)->stats);
+		kfree(nft_base_chain(chain));
+	} else
+		kfree(chain);
+}
+
+static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	struct nft_table *table;
+	struct nft_chain *chain;
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+
+	afi = nf_tables_afinfo_lookup(net, family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	if (!list_empty(&chain->rules))
+		return -EBUSY;
+
+	list_del(&chain->list);
+	table->use--;
+
+	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+	    chain->flags & NFT_BASE_CHAIN)
+		nf_unregister_hook(&nft_base_chain(chain)->ops);
+
+	nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN,
+			       family);
+
+	/* Make sure all rule references are gone before this is released */
+	call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy);
+	return 0;
+}
+
+static void nft_ctx_init(struct nft_ctx *ctx,
+			 const struct sk_buff *skb,
+			 const struct nlmsghdr *nlh,
+			 const struct nft_af_info *afi,
+			 const struct nft_table *table,
+			 const struct nft_chain *chain,
+			 const struct nlattr * const *nla)
+{
+	ctx->net   = sock_net(skb->sk);
+	ctx->skb   = skb;
+	ctx->nlh   = nlh;
+	ctx->afi   = afi;
+	ctx->table = table;
+	ctx->chain = chain;
+	ctx->nla   = nla;
+}
+
+/*
+ * Expressions
+ */
+
+/**
+ *	nft_register_expr - register nf_tables expr type
+ *	@ops: expr type
+ *
+ *	Registers the expr type for use with nf_tables. Returns zero on
+ *	success or a negative errno code otherwise.
+ */
+int nft_register_expr(struct nft_expr_type *type)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_add_tail(&type->list, &nf_tables_expressions);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_expr);
+
+/**
+ *	nft_unregister_expr - unregister nf_tables expr type
+ *	@ops: expr type
+ *
+ * 	Unregisters the expr typefor use with nf_tables.
+ */
+void nft_unregister_expr(struct nft_expr_type *type)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_del(&type->list);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_expr);
+
+static const struct nft_expr_type *__nft_expr_type_get(struct nlattr *nla)
+{
+	const struct nft_expr_type *type;
+
+	list_for_each_entry(type, &nf_tables_expressions, list) {
+		if (!nla_strcmp(nla, type->name))
+			return type;
+	}
+	return NULL;
+}
+
+static const struct nft_expr_type *nft_expr_type_get(struct nlattr *nla)
+{
+	const struct nft_expr_type *type;
+
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	type = __nft_expr_type_get(nla);
+	if (type != NULL && try_module_get(type->owner))
+		return type;
+
+#ifdef CONFIG_MODULES
+	if (type == NULL) {
+		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+		request_module("nft-expr-%.*s",
+			       nla_len(nla), (char *)nla_data(nla));
+		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		if (__nft_expr_type_get(nla))
+			return ERR_PTR(-EAGAIN);
+	}
+#endif
+	return ERR_PTR(-ENOENT);
+}
+
+static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = {
+	[NFTA_EXPR_NAME]	= { .type = NLA_STRING },
+	[NFTA_EXPR_DATA]	= { .type = NLA_NESTED },
+};
+
+static int nf_tables_fill_expr_info(struct sk_buff *skb,
+				    const struct nft_expr *expr)
+{
+	if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->type->name))
+		goto nla_put_failure;
+
+	if (expr->ops->dump) {
+		struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA);
+		if (data == NULL)
+			goto nla_put_failure;
+		if (expr->ops->dump(skb, expr) < 0)
+			goto nla_put_failure;
+		nla_nest_end(skb, data);
+	}
+
+	return skb->len;
+
+nla_put_failure:
+	return -1;
+};
+
+struct nft_expr_info {
+	const struct nft_expr_ops	*ops;
+	struct nlattr			*tb[NFT_EXPR_MAXATTR + 1];
+};
+
+static int nf_tables_expr_parse(const struct nft_ctx *ctx,
+				const struct nlattr *nla,
+				struct nft_expr_info *info)
+{
+	const struct nft_expr_type *type;
+	const struct nft_expr_ops *ops;
+	struct nlattr *tb[NFTA_EXPR_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy);
+	if (err < 0)
+		return err;
+
+	type = nft_expr_type_get(tb[NFTA_EXPR_NAME]);
+	if (IS_ERR(type))
+		return PTR_ERR(type);
+
+	if (tb[NFTA_EXPR_DATA]) {
+		err = nla_parse_nested(info->tb, type->maxattr,
+				       tb[NFTA_EXPR_DATA], type->policy);
+		if (err < 0)
+			goto err1;
+	} else
+		memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1));
+
+	if (type->select_ops != NULL) {
+		ops = type->select_ops(ctx,
+				       (const struct nlattr * const *)info->tb);
+		if (IS_ERR(ops)) {
+			err = PTR_ERR(ops);
+			goto err1;
+		}
+	} else
+		ops = type->ops;
+
+	info->ops = ops;
+	return 0;
+
+err1:
+	module_put(type->owner);
+	return err;
+}
+
+static int nf_tables_newexpr(const struct nft_ctx *ctx,
+			     const struct nft_expr_info *info,
+			     struct nft_expr *expr)
+{
+	const struct nft_expr_ops *ops = info->ops;
+	int err;
+
+	expr->ops = ops;
+	if (ops->init) {
+		err = ops->init(ctx, expr, (const struct nlattr **)info->tb);
+		if (err < 0)
+			goto err1;
+	}
+
+	return 0;
+
+err1:
+	expr->ops = NULL;
+	return err;
+}
+
+static void nf_tables_expr_destroy(struct nft_expr *expr)
+{
+	if (expr->ops->destroy)
+		expr->ops->destroy(expr);
+	module_put(expr->ops->type->owner);
+}
+
+/*
+ * Rules
+ */
+
+static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain,
+						u64 handle)
+{
+	struct nft_rule *rule;
+
+	// FIXME: this sucks
+	list_for_each_entry(rule, &chain->rules, list) {
+		if (handle == rule->handle)
+			return rule;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain,
+					      const struct nlattr *nla)
+{
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
+}
+
+static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
+	[NFTA_RULE_TABLE]	= { .type = NLA_STRING },
+	[NFTA_RULE_CHAIN]	= { .type = NLA_STRING,
+				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
+	[NFTA_RULE_HANDLE]	= { .type = NLA_U64 },
+	[NFTA_RULE_EXPRESSIONS]	= { .type = NLA_NESTED },
+	[NFTA_RULE_COMPAT]	= { .type = NLA_NESTED },
+	[NFTA_RULE_POSITION]	= { .type = NLA_U64 },
+};
+
+static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
+				    int event, u32 flags, int family,
+				    const struct nft_table *table,
+				    const struct nft_chain *chain,
+				    const struct nft_rule *rule)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	const struct nft_expr *expr, *next;
+	struct nlattr *list;
+	const struct nft_rule *prule;
+	int type = event | NFNL_SUBSYS_NFTABLES << 8;
+
+	nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg),
+			flags);
+	if (nlh == NULL)
+		goto nla_put_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family	= family;
+	nfmsg->version		= NFNETLINK_V0;
+	nfmsg->res_id		= 0;
+
+	if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle)))
+		goto nla_put_failure;
+
+	if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) {
+		prule = list_entry(rule->list.prev, struct nft_rule, list);
+		if (nla_put_be64(skb, NFTA_RULE_POSITION,
+				 cpu_to_be64(prule->handle)))
+			goto nla_put_failure;
+	}
+
+	list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS);
+	if (list == NULL)
+		goto nla_put_failure;
+	nft_rule_for_each_expr(expr, next, rule) {
+		struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM);
+		if (elem == NULL)
+			goto nla_put_failure;
+		if (nf_tables_fill_expr_info(skb, expr) < 0)
+			goto nla_put_failure;
+		nla_nest_end(skb, elem);
+	}
+	nla_nest_end(skb, list);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static int nf_tables_rule_notify(const struct sk_buff *oskb,
+				 const struct nlmsghdr *nlh,
+				 const struct nft_table *table,
+				 const struct nft_chain *chain,
+				 const struct nft_rule *rule,
+				 int event, u32 flags, int family)
+{
+	struct sk_buff *skb;
+	u32 portid = NETLINK_CB(oskb).portid;
+	struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+	u32 seq = nlh->nlmsg_seq;
+	bool report;
+	int err;
+
+	report = nlmsg_report(nlh);
+	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+		return 0;
+
+	err = -ENOBUFS;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	err = nf_tables_fill_rule_info(skb, portid, seq, event, flags,
+				       family, table, chain, rule);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+			     GFP_KERNEL);
+err:
+	if (err < 0)
+		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	return err;
+}
+
+static inline bool
+nft_rule_is_active(struct net *net, const struct nft_rule *rule)
+{
+	return (rule->genmask & (1 << net->nft.gencursor)) == 0;
+}
+
+static inline int gencursor_next(struct net *net)
+{
+	return net->nft.gencursor+1 == 1 ? 1 : 0;
+}
+
+static inline int
+nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
+{
+	return (rule->genmask & (1 << gencursor_next(net))) == 0;
+}
+
+static inline void
+nft_rule_activate_next(struct net *net, struct nft_rule *rule)
+{
+	/* Now inactive, will be active in the future */
+	rule->genmask = (1 << net->nft.gencursor);
+}
+
+static inline void
+nft_rule_disactivate_next(struct net *net, struct nft_rule *rule)
+{
+	rule->genmask = (1 << gencursor_next(net));
+}
+
+static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
+{
+	rule->genmask = 0;
+}
+
+static int nf_tables_dump_rules(struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+	const struct nft_rule *rule;
+	unsigned int idx = 0, s_idx = cb->args[0];
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+	u8 genctr = ACCESS_ONCE(net->nft.genctr);
+	u8 gencursor = ACCESS_ONCE(net->nft.gencursor);
+
+	list_for_each_entry(afi, &net->nft.af_info, list) {
+		if (family != NFPROTO_UNSPEC && family != afi->family)
+			continue;
+
+		list_for_each_entry(table, &afi->tables, list) {
+			list_for_each_entry(chain, &table->chains, list) {
+				list_for_each_entry(rule, &chain->rules, list) {
+					if (!nft_rule_is_active(net, rule))
+						goto cont;
+					if (idx < s_idx)
+						goto cont;
+					if (idx > s_idx)
+						memset(&cb->args[1], 0,
+						       sizeof(cb->args) - sizeof(cb->args[0]));
+					if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid,
+								      cb->nlh->nlmsg_seq,
+								      NFT_MSG_NEWRULE,
+								      NLM_F_MULTI | NLM_F_APPEND,
+								      afi->family, table, chain, rule) < 0)
+						goto done;
+cont:
+					idx++;
+				}
+			}
+		}
+	}
+done:
+	/* Invalidate this dump, a transition to the new generation happened */
+	if (gencursor != net->nft.gencursor || genctr != net->nft.genctr)
+		return -EBUSY;
+
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
+			     const struct nlmsghdr *nlh,
+			     const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+	const struct nft_rule *rule;
+	struct sk_buff *skb2;
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+	int err;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nf_tables_dump_rules,
+		};
+		return netlink_dump_start(nlsk, skb, nlh, &c);
+	}
+
+	afi = nf_tables_afinfo_lookup(net, family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+	if (IS_ERR(rule))
+		return PTR_ERR(rule);
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid,
+				       nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
+				       family, table, chain, rule);
+	if (err < 0)
+		goto err;
+
+	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+	kfree_skb(skb2);
+	return err;
+}
+
+static void nf_tables_rcu_rule_destroy(struct rcu_head *head)
+{
+	struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head);
+	struct nft_expr *expr;
+
+	/*
+	 * Careful: some expressions might not be initialized in case this
+	 * is called on error from nf_tables_newrule().
+	 */
+	expr = nft_expr_first(rule);
+	while (expr->ops && expr != nft_expr_last(rule)) {
+		nf_tables_expr_destroy(expr);
+		expr = nft_expr_next(expr);
+	}
+	kfree(rule);
+}
+
+static void nf_tables_rule_destroy(struct nft_rule *rule)
+{
+	call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy);
+}
+
+#define NFT_RULE_MAXEXPRS	128
+
+static struct nft_expr_info *info;
+
+static struct nft_rule_trans *
+nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx)
+{
+	struct nft_rule_trans *rupd;
+
+	rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL);
+	if (rupd == NULL)
+	       return NULL;
+
+	rupd->chain = ctx->chain;
+	rupd->table = ctx->table;
+	rupd->rule = rule;
+	rupd->family = ctx->afi->family;
+	rupd->nlh = ctx->nlh;
+	list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
+
+	return rupd;
+}
+
+static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
+			     const struct nlmsghdr *nlh,
+			     const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	struct net *net = sock_net(skb->sk);
+	struct nft_table *table;
+	struct nft_chain *chain;
+	struct nft_rule *rule, *old_rule = NULL;
+	struct nft_rule_trans *repl = NULL;
+	struct nft_expr *expr;
+	struct nft_ctx ctx;
+	struct nlattr *tmp;
+	unsigned int size, i, n;
+	int err, rem;
+	bool create;
+	u64 handle, pos_handle;
+
+	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+	afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	if (nla[NFTA_RULE_HANDLE]) {
+		handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
+		rule = __nf_tables_rule_lookup(chain, handle);
+		if (IS_ERR(rule))
+			return PTR_ERR(rule);
+
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			old_rule = rule;
+		else
+			return -EOPNOTSUPP;
+	} else {
+		if (!create || nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EINVAL;
+		handle = nf_tables_alloc_handle(table);
+	}
+
+	if (nla[NFTA_RULE_POSITION]) {
+		if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+			return -EOPNOTSUPP;
+
+		pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
+		old_rule = __nf_tables_rule_lookup(chain, pos_handle);
+		if (IS_ERR(old_rule))
+			return PTR_ERR(old_rule);
+	}
+
+	nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+
+	n = 0;
+	size = 0;
+	if (nla[NFTA_RULE_EXPRESSIONS]) {
+		nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
+			err = -EINVAL;
+			if (nla_type(tmp) != NFTA_LIST_ELEM)
+				goto err1;
+			if (n == NFT_RULE_MAXEXPRS)
+				goto err1;
+			err = nf_tables_expr_parse(&ctx, tmp, &info[n]);
+			if (err < 0)
+				goto err1;
+			size += info[n].ops->size;
+			n++;
+		}
+	}
+
+	err = -ENOMEM;
+	rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL);
+	if (rule == NULL)
+		goto err1;
+
+	nft_rule_activate_next(net, rule);
+
+	rule->handle = handle;
+	rule->dlen   = size;
+
+	expr = nft_expr_first(rule);
+	for (i = 0; i < n; i++) {
+		err = nf_tables_newexpr(&ctx, &info[i], expr);
+		if (err < 0)
+			goto err2;
+		info[i].ops = NULL;
+		expr = nft_expr_next(expr);
+	}
+
+	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+		if (nft_rule_is_active_next(net, old_rule)) {
+			repl = nf_tables_trans_add(old_rule, &ctx);
+			if (repl == NULL) {
+				err = -ENOMEM;
+				goto err2;
+			}
+			nft_rule_disactivate_next(net, old_rule);
+			list_add_tail(&rule->list, &old_rule->list);
+		} else {
+			err = -ENOENT;
+			goto err2;
+		}
+	} else if (nlh->nlmsg_flags & NLM_F_APPEND)
+		if (old_rule)
+			list_add_rcu(&rule->list, &old_rule->list);
+		else
+			list_add_tail_rcu(&rule->list, &chain->rules);
+	else {
+		if (old_rule)
+			list_add_tail_rcu(&rule->list, &old_rule->list);
+		else
+			list_add_rcu(&rule->list, &chain->rules);
+	}
+
+	if (nf_tables_trans_add(rule, &ctx) == NULL) {
+		err = -ENOMEM;
+		goto err3;
+	}
+	return 0;
+
+err3:
+	list_del_rcu(&rule->list);
+	if (repl) {
+		list_del_rcu(&repl->rule->list);
+		list_del(&repl->list);
+		nft_rule_clear(net, repl->rule);
+		kfree(repl);
+	}
+err2:
+	nf_tables_rule_destroy(rule);
+err1:
+	for (i = 0; i < n; i++) {
+		if (info[i].ops != NULL)
+			module_put(info[i].ops->type->owner);
+	}
+	return err;
+}
+
+static int
+nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
+{
+	/* You cannot delete the same rule twice */
+	if (nft_rule_is_active_next(ctx->net, rule)) {
+		if (nf_tables_trans_add(rule, ctx) == NULL)
+			return -ENOMEM;
+		nft_rule_disactivate_next(ctx->net, rule);
+		return 0;
+	}
+	return -ENOENT;
+}
+
+static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
+			     const struct nlmsghdr *nlh,
+			     const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	struct net *net = sock_net(skb->sk);
+	const struct nft_table *table;
+	struct nft_chain *chain;
+	struct nft_rule *rule, *tmp;
+	int family = nfmsg->nfgen_family, err = 0;
+	struct nft_ctx ctx;
+
+	afi = nf_tables_afinfo_lookup(net, family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+
+	if (nla[NFTA_RULE_HANDLE]) {
+		rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+		if (IS_ERR(rule))
+			return PTR_ERR(rule);
+
+		err = nf_tables_delrule_one(&ctx, rule);
+	} else {
+		/* Remove all rules in this chain */
+		list_for_each_entry_safe(rule, tmp, &chain->rules, list) {
+			err = nf_tables_delrule_one(&ctx, rule);
+			if (err < 0)
+				break;
+		}
+	}
+
+	return err;
+}
+
+static int nf_tables_commit(struct sk_buff *skb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nft_rule_trans *rupd, *tmp;
+
+	/* Bump generation counter, invalidate any dump in progress */
+	net->nft.genctr++;
+
+	/* A new generation has just started */
+	net->nft.gencursor = gencursor_next(net);
+
+	/* Make sure all packets have left the previous generation before
+	 * purging old rules.
+	 */
+	synchronize_rcu();
+
+	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
+		/* Delete this rule from the dirty list */
+		list_del(&rupd->list);
+
+		/* This rule was inactive in the past and just became active.
+		 * Clear the next bit of the genmask since its meaning has
+		 * changed, now it is the future.
+		 */
+		if (nft_rule_is_active(net, rupd->rule)) {
+			nft_rule_clear(net, rupd->rule);
+			nf_tables_rule_notify(skb, rupd->nlh, rupd->table,
+					      rupd->chain, rupd->rule,
+					      NFT_MSG_NEWRULE, 0,
+					      rupd->family);
+			kfree(rupd);
+			continue;
+		}
+
+		/* This rule is in the past, get rid of it */
+		list_del_rcu(&rupd->rule->list);
+		nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain,
+				      rupd->rule, NFT_MSG_DELRULE, 0,
+				      rupd->family);
+		nf_tables_rule_destroy(rupd->rule);
+		kfree(rupd);
+	}
+
+	return 0;
+}
+
+static int nf_tables_abort(struct sk_buff *skb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nft_rule_trans *rupd, *tmp;
+
+	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
+		/* Delete all rules from the dirty list */
+		list_del(&rupd->list);
+
+		if (!nft_rule_is_active_next(net, rupd->rule)) {
+			nft_rule_clear(net, rupd->rule);
+			kfree(rupd);
+			continue;
+		}
+
+		/* This rule is inactive, get rid of it */
+		list_del_rcu(&rupd->rule->list);
+		nf_tables_rule_destroy(rupd->rule);
+		kfree(rupd);
+	}
+	return 0;
+}
+
+/*
+ * Sets
+ */
+
+static LIST_HEAD(nf_tables_set_ops);
+
+int nft_register_set(struct nft_set_ops *ops)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_add_tail(&ops->list, &nf_tables_set_ops);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_set);
+
+void nft_unregister_set(struct nft_set_ops *ops)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_del(&ops->list);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_set);
+
+static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[])
+{
+	const struct nft_set_ops *ops;
+	u32 features;
+
+#ifdef CONFIG_MODULES
+	if (list_empty(&nf_tables_set_ops)) {
+		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+		request_module("nft-set");
+		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		if (!list_empty(&nf_tables_set_ops))
+			return ERR_PTR(-EAGAIN);
+	}
+#endif
+	features = 0;
+	if (nla[NFTA_SET_FLAGS] != NULL) {
+		features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
+		features &= NFT_SET_INTERVAL | NFT_SET_MAP;
+	}
+
+	// FIXME: implement selection properly
+	list_for_each_entry(ops, &nf_tables_set_ops, list) {
+		if ((ops->features & features) != features)
+			continue;
+		if (!try_module_get(ops->owner))
+			continue;
+		return ops;
+	}
+
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
+	[NFTA_SET_TABLE]		= { .type = NLA_STRING },
+	[NFTA_SET_NAME]			= { .type = NLA_STRING },
+	[NFTA_SET_FLAGS]		= { .type = NLA_U32 },
+	[NFTA_SET_KEY_TYPE]		= { .type = NLA_U32 },
+	[NFTA_SET_KEY_LEN]		= { .type = NLA_U32 },
+	[NFTA_SET_DATA_TYPE]		= { .type = NLA_U32 },
+	[NFTA_SET_DATA_LEN]		= { .type = NLA_U32 },
+};
+
+static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
+				     const struct sk_buff *skb,
+				     const struct nlmsghdr *nlh,
+				     const struct nlattr * const nla[])
+{
+	struct net *net = sock_net(skb->sk);
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table = NULL;
+
+	afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	if (nla[NFTA_SET_TABLE] != NULL) {
+		table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
+		if (IS_ERR(table))
+			return PTR_ERR(table);
+	}
+
+	nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
+	return 0;
+}
+
+struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
+				     const struct nlattr *nla)
+{
+	struct nft_set *set;
+
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	list_for_each_entry(set, &table->sets, list) {
+		if (!nla_strcmp(nla, set->name))
+			return set;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
+				    const char *name)
+{
+	const struct nft_set *i;
+	const char *p;
+	unsigned long *inuse;
+	unsigned int n = 0;
+
+	p = strnchr(name, IFNAMSIZ, '%');
+	if (p != NULL) {
+		if (p[1] != 'd' || strchr(p + 2, '%'))
+			return -EINVAL;
+
+		inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+		if (inuse == NULL)
+			return -ENOMEM;
+
+		list_for_each_entry(i, &ctx->table->sets, list) {
+			if (!sscanf(i->name, name, &n))
+				continue;
+			if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE)
+				continue;
+			set_bit(n, inuse);
+		}
+
+		n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE);
+		free_page((unsigned long)inuse);
+	}
+
+	snprintf(set->name, sizeof(set->name), name, n);
+	list_for_each_entry(i, &ctx->table->sets, list) {
+		if (!strcmp(set->name, i->name))
+			return -ENFILE;
+	}
+	return 0;
+}
+
+static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
+			      const struct nft_set *set, u16 event, u16 flags)
+{
+	struct nfgenmsg *nfmsg;
+	struct nlmsghdr *nlh;
+	u32 portid = NETLINK_CB(ctx->skb).portid;
+	u32 seq = ctx->nlh->nlmsg_seq;
+
+	event |= NFNL_SUBSYS_NFTABLES << 8;
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
+			flags);
+	if (nlh == NULL)
+		goto nla_put_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family	= ctx->afi->family;
+	nfmsg->version		= NFNETLINK_V0;
+	nfmsg->res_id		= 0;
+
+	if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_SET_NAME, set->name))
+		goto nla_put_failure;
+	if (set->flags != 0)
+		if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags)))
+			goto nla_put_failure;
+
+	if (nla_put_be32(skb, NFTA_SET_KEY_TYPE, htonl(set->ktype)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_SET_KEY_LEN, htonl(set->klen)))
+		goto nla_put_failure;
+	if (set->flags & NFT_SET_MAP) {
+		if (nla_put_be32(skb, NFTA_SET_DATA_TYPE, htonl(set->dtype)))
+			goto nla_put_failure;
+		if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen)))
+			goto nla_put_failure;
+	}
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static int nf_tables_set_notify(const struct nft_ctx *ctx,
+				const struct nft_set *set,
+				int event)
+{
+	struct sk_buff *skb;
+	u32 portid = NETLINK_CB(ctx->skb).portid;
+	bool report;
+	int err;
+
+	report = nlmsg_report(ctx->nlh);
+	if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+		return 0;
+
+	err = -ENOBUFS;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	err = nf_tables_fill_set(skb, ctx, set, event, 0);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report,
+			     GFP_KERNEL);
+err:
+	if (err < 0)
+		nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
+	return err;
+}
+
+static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
+				     struct netlink_callback *cb)
+{
+	const struct nft_set *set;
+	unsigned int idx = 0, s_idx = cb->args[0];
+
+	if (cb->args[1])
+		return skb->len;
+
+	list_for_each_entry(set, &ctx->table->sets, list) {
+		if (idx < s_idx)
+			goto cont;
+		if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
+				       NLM_F_MULTI) < 0) {
+			cb->args[0] = idx;
+			goto done;
+		}
+cont:
+		idx++;
+	}
+	cb->args[1] = 1;
+done:
+	return skb->len;
+}
+
+static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	const struct nft_set *set;
+	unsigned int idx = 0, s_idx = cb->args[0];
+	struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
+
+	if (cb->args[1])
+		return skb->len;
+
+	list_for_each_entry(table, &ctx->afi->tables, list) {
+		if (cur_table && cur_table != table)
+			continue;
+
+		ctx->table = table;
+		list_for_each_entry(set, &ctx->table->sets, list) {
+			if (idx < s_idx)
+				goto cont;
+			if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
+					       NLM_F_MULTI) < 0) {
+				cb->args[0] = idx;
+				cb->args[2] = (unsigned long) table;
+				goto done;
+			}
+cont:
+			idx++;
+		}
+	}
+	cb->args[1] = 1;
+done:
+	return skb->len;
+}
+
+static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	struct nlattr *nla[NFTA_SET_MAX + 1];
+	struct nft_ctx ctx;
+	int err, ret;
+
+	err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_MAX,
+			  nft_set_policy);
+	if (err < 0)
+		return err;
+
+	err = nft_ctx_init_from_setattr(&ctx, cb->skb, cb->nlh, (void *)nla);
+	if (err < 0)
+		return err;
+
+	if (ctx.table == NULL)
+		ret = nf_tables_dump_sets_all(&ctx, skb, cb);
+	else
+		ret = nf_tables_dump_sets_table(&ctx, skb, cb);
+
+	return ret;
+}
+
+static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
+			    const struct nlmsghdr *nlh,
+			    const struct nlattr * const nla[])
+{
+	const struct nft_set *set;
+	struct nft_ctx ctx;
+	struct sk_buff *skb2;
+	int err;
+
+	/* Verify existance before starting dump */
+	err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
+	if (err < 0)
+		return err;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nf_tables_dump_sets,
+		};
+		return netlink_dump_start(nlsk, skb, nlh, &c);
+	}
+
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb2 == NULL)
+		return -ENOMEM;
+
+	err = nf_tables_fill_set(skb2, &ctx, set, NFT_MSG_NEWSET, 0);
+	if (err < 0)
+		goto err;
+
+	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+	kfree_skb(skb2);
+	return err;
+}
+
+static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
+			    const struct nlmsghdr *nlh,
+			    const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_set_ops *ops;
+	const struct nft_af_info *afi;
+	struct net *net = sock_net(skb->sk);
+	struct nft_table *table;
+	struct nft_set *set;
+	struct nft_ctx ctx;
+	char name[IFNAMSIZ];
+	unsigned int size;
+	bool create;
+	u32 ktype, klen, dlen, dtype, flags;
+	int err;
+
+	if (nla[NFTA_SET_TABLE] == NULL ||
+	    nla[NFTA_SET_NAME] == NULL ||
+	    nla[NFTA_SET_KEY_LEN] == NULL)
+		return -EINVAL;
+
+	ktype = NFT_DATA_VALUE;
+	if (nla[NFTA_SET_KEY_TYPE] != NULL) {
+		ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
+		if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
+			return -EINVAL;
+	}
+
+	klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
+	if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data))
+		return -EINVAL;
+
+	flags = 0;
+	if (nla[NFTA_SET_FLAGS] != NULL) {
+		flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
+		if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
+			      NFT_SET_INTERVAL | NFT_SET_MAP))
+			return -EINVAL;
+	}
+
+	dtype = 0;
+	dlen  = 0;
+	if (nla[NFTA_SET_DATA_TYPE] != NULL) {
+		if (!(flags & NFT_SET_MAP))
+			return -EINVAL;
+
+		dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
+		if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
+		    dtype != NFT_DATA_VERDICT)
+			return -EINVAL;
+
+		if (dtype != NFT_DATA_VERDICT) {
+			if (nla[NFTA_SET_DATA_LEN] == NULL)
+				return -EINVAL;
+			dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
+			if (dlen == 0 ||
+			    dlen > FIELD_SIZEOF(struct nft_data, data))
+				return -EINVAL;
+		} else
+			dlen = sizeof(struct nft_data);
+	} else if (flags & NFT_SET_MAP)
+		return -EINVAL;
+
+	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+	afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+
+	set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]);
+	if (IS_ERR(set)) {
+		if (PTR_ERR(set) != -ENOENT)
+			return PTR_ERR(set);
+		set = NULL;
+	}
+
+	if (set != NULL) {
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+		return 0;
+	}
+
+	if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+		return -ENOENT;
+
+	ops = nft_select_set_ops(nla);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+
+	size = 0;
+	if (ops->privsize != NULL)
+		size = ops->privsize(nla);
+
+	err = -ENOMEM;
+	set = kzalloc(sizeof(*set) + size, GFP_KERNEL);
+	if (set == NULL)
+		goto err1;
+
+	nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
+	err = nf_tables_set_alloc_name(&ctx, set, name);
+	if (err < 0)
+		goto err2;
+
+	INIT_LIST_HEAD(&set->bindings);
+	set->ops   = ops;
+	set->ktype = ktype;
+	set->klen  = klen;
+	set->dtype = dtype;
+	set->dlen  = dlen;
+	set->flags = flags;
+
+	err = ops->init(set, nla);
+	if (err < 0)
+		goto err2;
+
+	list_add_tail(&set->list, &table->sets);
+	nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET);
+	return 0;
+
+err2:
+	kfree(set);
+err1:
+	module_put(ops->owner);
+	return err;
+}
+
+static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	list_del(&set->list);
+	if (!(set->flags & NFT_SET_ANONYMOUS))
+		nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
+
+	set->ops->destroy(set);
+	module_put(set->ops->owner);
+	kfree(set);
+}
+
+static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
+			    const struct nlmsghdr *nlh,
+			    const struct nlattr * const nla[])
+{
+	struct nft_set *set;
+	struct nft_ctx ctx;
+	int err;
+
+	if (nla[NFTA_SET_TABLE] == NULL)
+		return -EINVAL;
+
+	err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
+	if (err < 0)
+		return err;
+
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+	if (!list_empty(&set->bindings))
+		return -EBUSY;
+
+	nf_tables_set_destroy(&ctx, set);
+	return 0;
+}
+
+static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
+					const struct nft_set *set,
+					const struct nft_set_iter *iter,
+					const struct nft_set_elem *elem)
+{
+	enum nft_registers dreg;
+
+	dreg = nft_type_to_reg(set->dtype);
+	return nft_validate_data_load(ctx, dreg, &elem->data, set->dtype);
+}
+
+int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
+		       struct nft_set_binding *binding)
+{
+	struct nft_set_binding *i;
+	struct nft_set_iter iter;
+
+	if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+		return -EBUSY;
+
+	if (set->flags & NFT_SET_MAP) {
+		/* If the set is already bound to the same chain all
+		 * jumps are already validated for that chain.
+		 */
+		list_for_each_entry(i, &set->bindings, list) {
+			if (i->chain == binding->chain)
+				goto bind;
+		}
+
+		iter.skip 	= 0;
+		iter.count	= 0;
+		iter.err	= 0;
+		iter.fn		= nf_tables_bind_check_setelem;
+
+		set->ops->walk(ctx, set, &iter);
+		if (iter.err < 0) {
+			/* Destroy anonymous sets if binding fails */
+			if (set->flags & NFT_SET_ANONYMOUS)
+				nf_tables_set_destroy(ctx, set);
+
+			return iter.err;
+		}
+	}
+bind:
+	binding->chain = ctx->chain;
+	list_add_tail(&binding->list, &set->bindings);
+	return 0;
+}
+
+void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
+			  struct nft_set_binding *binding)
+{
+	list_del(&binding->list);
+
+	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+		nf_tables_set_destroy(ctx, set);
+}
+
+/*
+ * Set elements
+ */
+
+static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
+	[NFTA_SET_ELEM_KEY]		= { .type = NLA_NESTED },
+	[NFTA_SET_ELEM_DATA]		= { .type = NLA_NESTED },
+	[NFTA_SET_ELEM_FLAGS]		= { .type = NLA_U32 },
+};
+
+static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
+	[NFTA_SET_ELEM_LIST_TABLE]	= { .type = NLA_STRING },
+	[NFTA_SET_ELEM_LIST_SET]	= { .type = NLA_STRING },
+	[NFTA_SET_ELEM_LIST_ELEMENTS]	= { .type = NLA_NESTED },
+};
+
+static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
+				      const struct sk_buff *skb,
+				      const struct nlmsghdr *nlh,
+				      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	struct net *net = sock_net(skb->sk);
+
+	afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
+	return 0;
+}
+
+static int nf_tables_fill_setelem(struct sk_buff *skb,
+				  const struct nft_set *set,
+				  const struct nft_set_elem *elem)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NFTA_LIST_ELEM);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE,
+			  set->klen) < 0)
+		goto nla_put_failure;
+
+	if (set->flags & NFT_SET_MAP &&
+	    !(elem->flags & NFT_SET_ELEM_INTERVAL_END) &&
+	    nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data,
+			  set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
+			  set->dlen) < 0)
+		goto nla_put_failure;
+
+	if (elem->flags != 0)
+		if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags)))
+			goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -EMSGSIZE;
+}
+
+struct nft_set_dump_args {
+	const struct netlink_callback	*cb;
+	struct nft_set_iter		iter;
+	struct sk_buff			*skb;
+};
+
+static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
+				  const struct nft_set *set,
+				  const struct nft_set_iter *iter,
+				  const struct nft_set_elem *elem)
+{
+	struct nft_set_dump_args *args;
+
+	args = container_of(iter, struct nft_set_dump_args, iter);
+	return nf_tables_fill_setelem(args->skb, set, elem);
+}
+
+static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct nft_set *set;
+	struct nft_set_dump_args args;
+	struct nft_ctx ctx;
+	struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1];
+	struct nfgenmsg *nfmsg;
+	struct nlmsghdr *nlh;
+	struct nlattr *nest;
+	u32 portid, seq;
+	int event, err;
+
+	nfmsg = nlmsg_data(cb->nlh);
+	err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX,
+			  nft_set_elem_list_policy);
+	if (err < 0)
+		return err;
+
+	err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla);
+	if (err < 0)
+		return err;
+
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+
+	event  = NFT_MSG_NEWSETELEM;
+	event |= NFNL_SUBSYS_NFTABLES << 8;
+	portid = NETLINK_CB(cb->skb).portid;
+	seq    = cb->nlh->nlmsg_seq;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
+			NLM_F_MULTI);
+	if (nlh == NULL)
+		goto nla_put_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family = NFPROTO_UNSPEC;
+	nfmsg->version      = NFNETLINK_V0;
+	nfmsg->res_id       = 0;
+
+	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
+		goto nla_put_failure;
+
+	nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	args.cb		= cb;
+	args.skb	= skb;
+	args.iter.skip	= cb->args[0];
+	args.iter.count	= 0;
+	args.iter.err   = 0;
+	args.iter.fn	= nf_tables_dump_setelem;
+	set->ops->walk(&ctx, set, &args.iter);
+
+	nla_nest_end(skb, nest);
+	nlmsg_end(skb, nlh);
+
+	if (args.iter.err && args.iter.err != -EMSGSIZE)
+		return args.iter.err;
+	if (args.iter.count == cb->args[0])
+		return 0;
+
+	cb->args[0] = args.iter.count;
+	return skb->len;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
+				const struct nlmsghdr *nlh,
+				const struct nlattr * const nla[])
+{
+	const struct nft_set *set;
+	struct nft_ctx ctx;
+	int err;
+
+	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+	if (err < 0)
+		return err;
+
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nf_tables_dump_set,
+		};
+		return netlink_dump_start(nlsk, skb, nlh, &c);
+	}
+	return -EOPNOTSUPP;
+}
+
+static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
+			    const struct nlattr *attr)
+{
+	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+	struct nft_data_desc d1, d2;
+	struct nft_set_elem elem;
+	struct nft_set_binding *binding;
+	enum nft_registers dreg;
+	int err;
+
+	err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
+			       nft_set_elem_policy);
+	if (err < 0)
+		return err;
+
+	if (nla[NFTA_SET_ELEM_KEY] == NULL)
+		return -EINVAL;
+
+	elem.flags = 0;
+	if (nla[NFTA_SET_ELEM_FLAGS] != NULL) {
+		elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
+		if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END)
+			return -EINVAL;
+	}
+
+	if (set->flags & NFT_SET_MAP) {
+		if (nla[NFTA_SET_ELEM_DATA] == NULL &&
+		    !(elem.flags & NFT_SET_ELEM_INTERVAL_END))
+			return -EINVAL;
+	} else {
+		if (nla[NFTA_SET_ELEM_DATA] != NULL)
+			return -EINVAL;
+	}
+
+	err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]);
+	if (err < 0)
+		goto err1;
+	err = -EINVAL;
+	if (d1.type != NFT_DATA_VALUE || d1.len != set->klen)
+		goto err2;
+
+	err = -EEXIST;
+	if (set->ops->get(set, &elem) == 0)
+		goto err2;
+
+	if (nla[NFTA_SET_ELEM_DATA] != NULL) {
+		err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]);
+		if (err < 0)
+			goto err2;
+
+		err = -EINVAL;
+		if (set->dtype != NFT_DATA_VERDICT && d2.len != set->dlen)
+			goto err3;
+
+		dreg = nft_type_to_reg(set->dtype);
+		list_for_each_entry(binding, &set->bindings, list) {
+			struct nft_ctx bind_ctx = {
+				.afi	= ctx->afi,
+				.table	= ctx->table,
+				.chain	= binding->chain,
+			};
+
+			err = nft_validate_data_load(&bind_ctx, dreg,
+						     &elem.data, d2.type);
+			if (err < 0)
+				goto err3;
+		}
+	}
+
+	err = set->ops->insert(set, &elem);
+	if (err < 0)
+		goto err3;
+
+	return 0;
+
+err3:
+	if (nla[NFTA_SET_ELEM_DATA] != NULL)
+		nft_data_uninit(&elem.data, d2.type);
+err2:
+	nft_data_uninit(&elem.key, d1.type);
+err1:
+	return err;
+}
+
+static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
+				const struct nlmsghdr *nlh,
+				const struct nlattr * const nla[])
+{
+	const struct nlattr *attr;
+	struct nft_set *set;
+	struct nft_ctx ctx;
+	int rem, err;
+
+	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+	if (err < 0)
+		return err;
+
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+		return -EBUSY;
+
+	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+		err = nft_add_set_elem(&ctx, set, attr);
+		if (err < 0)
+			return err;
+	}
+	return 0;
+}
+
+static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
+			   const struct nlattr *attr)
+{
+	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+	struct nft_data_desc desc;
+	struct nft_set_elem elem;
+	int err;
+
+	err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
+			       nft_set_elem_policy);
+	if (err < 0)
+		goto err1;
+
+	err = -EINVAL;
+	if (nla[NFTA_SET_ELEM_KEY] == NULL)
+		goto err1;
+
+	err = nft_data_init(ctx, &elem.key, &desc, nla[NFTA_SET_ELEM_KEY]);
+	if (err < 0)
+		goto err1;
+
+	err = -EINVAL;
+	if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
+		goto err2;
+
+	err = set->ops->get(set, &elem);
+	if (err < 0)
+		goto err2;
+
+	set->ops->remove(set, &elem);
+
+	nft_data_uninit(&elem.key, NFT_DATA_VALUE);
+	if (set->flags & NFT_SET_MAP)
+		nft_data_uninit(&elem.data, set->dtype);
+
+err2:
+	nft_data_uninit(&elem.key, desc.type);
+err1:
+	return err;
+}
+
+static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
+				const struct nlmsghdr *nlh,
+				const struct nlattr * const nla[])
+{
+	const struct nlattr *attr;
+	struct nft_set *set;
+	struct nft_ctx ctx;
+	int rem, err;
+
+	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+	if (err < 0)
+		return err;
+
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+		return -EBUSY;
+
+	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+		err = nft_del_setelem(&ctx, set, attr);
+		if (err < 0)
+			return err;
+	}
+	return 0;
+}
+
+static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
+	[NFT_MSG_NEWTABLE] = {
+		.call		= nf_tables_newtable,
+		.attr_count	= NFTA_TABLE_MAX,
+		.policy		= nft_table_policy,
+	},
+	[NFT_MSG_GETTABLE] = {
+		.call		= nf_tables_gettable,
+		.attr_count	= NFTA_TABLE_MAX,
+		.policy		= nft_table_policy,
+	},
+	[NFT_MSG_DELTABLE] = {
+		.call		= nf_tables_deltable,
+		.attr_count	= NFTA_TABLE_MAX,
+		.policy		= nft_table_policy,
+	},
+	[NFT_MSG_NEWCHAIN] = {
+		.call		= nf_tables_newchain,
+		.attr_count	= NFTA_CHAIN_MAX,
+		.policy		= nft_chain_policy,
+	},
+	[NFT_MSG_GETCHAIN] = {
+		.call		= nf_tables_getchain,
+		.attr_count	= NFTA_CHAIN_MAX,
+		.policy		= nft_chain_policy,
+	},
+	[NFT_MSG_DELCHAIN] = {
+		.call		= nf_tables_delchain,
+		.attr_count	= NFTA_CHAIN_MAX,
+		.policy		= nft_chain_policy,
+	},
+	[NFT_MSG_NEWRULE] = {
+		.call_batch	= nf_tables_newrule,
+		.attr_count	= NFTA_RULE_MAX,
+		.policy		= nft_rule_policy,
+	},
+	[NFT_MSG_GETRULE] = {
+		.call		= nf_tables_getrule,
+		.attr_count	= NFTA_RULE_MAX,
+		.policy		= nft_rule_policy,
+	},
+	[NFT_MSG_DELRULE] = {
+		.call_batch	= nf_tables_delrule,
+		.attr_count	= NFTA_RULE_MAX,
+		.policy		= nft_rule_policy,
+	},
+	[NFT_MSG_NEWSET] = {
+		.call		= nf_tables_newset,
+		.attr_count	= NFTA_SET_MAX,
+		.policy		= nft_set_policy,
+	},
+	[NFT_MSG_GETSET] = {
+		.call		= nf_tables_getset,
+		.attr_count	= NFTA_SET_MAX,
+		.policy		= nft_set_policy,
+	},
+	[NFT_MSG_DELSET] = {
+		.call		= nf_tables_delset,
+		.attr_count	= NFTA_SET_MAX,
+		.policy		= nft_set_policy,
+	},
+	[NFT_MSG_NEWSETELEM] = {
+		.call		= nf_tables_newsetelem,
+		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
+		.policy		= nft_set_elem_list_policy,
+	},
+	[NFT_MSG_GETSETELEM] = {
+		.call		= nf_tables_getsetelem,
+		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
+		.policy		= nft_set_elem_list_policy,
+	},
+	[NFT_MSG_DELSETELEM] = {
+		.call		= nf_tables_delsetelem,
+		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
+		.policy		= nft_set_elem_list_policy,
+	},
+};
+
+static const struct nfnetlink_subsystem nf_tables_subsys = {
+	.name		= "nf_tables",
+	.subsys_id	= NFNL_SUBSYS_NFTABLES,
+	.cb_count	= NFT_MSG_MAX,
+	.cb		= nf_tables_cb,
+	.commit		= nf_tables_commit,
+	.abort		= nf_tables_abort,
+};
+
+/*
+ * Loop detection - walk through the ruleset beginning at the destination chain
+ * of a new jump until either the source chain is reached (loop) or all
+ * reachable chains have been traversed.
+ *
+ * The loop check is performed whenever a new jump verdict is added to an
+ * expression or verdict map or a verdict map is bound to a new chain.
+ */
+
+static int nf_tables_check_loops(const struct nft_ctx *ctx,
+				 const struct nft_chain *chain);
+
+static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
+					const struct nft_set *set,
+					const struct nft_set_iter *iter,
+					const struct nft_set_elem *elem)
+{
+	switch (elem->data.verdict) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		return nf_tables_check_loops(ctx, elem->data.chain);
+	default:
+		return 0;
+	}
+}
+
+static int nf_tables_check_loops(const struct nft_ctx *ctx,
+				 const struct nft_chain *chain)
+{
+	const struct nft_rule *rule;
+	const struct nft_expr *expr, *last;
+	const struct nft_set *set;
+	struct nft_set_binding *binding;
+	struct nft_set_iter iter;
+
+	if (ctx->chain == chain)
+		return -ELOOP;
+
+	list_for_each_entry(rule, &chain->rules, list) {
+		nft_rule_for_each_expr(expr, last, rule) {
+			const struct nft_data *data = NULL;
+			int err;
+
+			if (!expr->ops->validate)
+				continue;
+
+			err = expr->ops->validate(ctx, expr, &data);
+			if (err < 0)
+				return err;
+
+			if (data == NULL)
+				continue;
+
+			switch (data->verdict) {
+			case NFT_JUMP:
+			case NFT_GOTO:
+				err = nf_tables_check_loops(ctx, data->chain);
+				if (err < 0)
+					return err;
+			default:
+				break;
+			}
+		}
+	}
+
+	list_for_each_entry(set, &ctx->table->sets, list) {
+		if (!(set->flags & NFT_SET_MAP) ||
+		    set->dtype != NFT_DATA_VERDICT)
+			continue;
+
+		list_for_each_entry(binding, &set->bindings, list) {
+			if (binding->chain != chain)
+				continue;
+
+			iter.skip 	= 0;
+			iter.count	= 0;
+			iter.err	= 0;
+			iter.fn		= nf_tables_loop_check_setelem;
+
+			set->ops->walk(ctx, set, &iter);
+			if (iter.err < 0)
+				return iter.err;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *	nft_validate_input_register - validate an expressions' input register
+ *
+ *	@reg: the register number
+ *
+ * 	Validate that the input register is one of the general purpose
+ * 	registers.
+ */
+int nft_validate_input_register(enum nft_registers reg)
+{
+	if (reg <= NFT_REG_VERDICT)
+		return -EINVAL;
+	if (reg > NFT_REG_MAX)
+		return -ERANGE;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_validate_input_register);
+
+/**
+ *	nft_validate_output_register - validate an expressions' output register
+ *
+ *	@reg: the register number
+ *
+ * 	Validate that the output register is one of the general purpose
+ * 	registers or the verdict register.
+ */
+int nft_validate_output_register(enum nft_registers reg)
+{
+	if (reg < NFT_REG_VERDICT)
+		return -EINVAL;
+	if (reg > NFT_REG_MAX)
+		return -ERANGE;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_validate_output_register);
+
+/**
+ *	nft_validate_data_load - validate an expressions' data load
+ *
+ *	@ctx: context of the expression performing the load
+ * 	@reg: the destination register number
+ * 	@data: the data to load
+ * 	@type: the data type
+ *
+ * 	Validate that a data load uses the appropriate data type for
+ * 	the destination register. A value of NULL for the data means
+ * 	that its runtime gathered data, which is always of type
+ * 	NFT_DATA_VALUE.
+ */
+int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg,
+			   const struct nft_data *data,
+			   enum nft_data_types type)
+{
+	int err;
+
+	switch (reg) {
+	case NFT_REG_VERDICT:
+		if (data == NULL || type != NFT_DATA_VERDICT)
+			return -EINVAL;
+
+		if (data->verdict == NFT_GOTO || data->verdict == NFT_JUMP) {
+			err = nf_tables_check_loops(ctx, data->chain);
+			if (err < 0)
+				return err;
+
+			if (ctx->chain->level + 1 > data->chain->level) {
+				if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE)
+					return -EMLINK;
+				data->chain->level = ctx->chain->level + 1;
+			}
+		}
+
+		return 0;
+	default:
+		if (data != NULL && type != NFT_DATA_VALUE)
+			return -EINVAL;
+		return 0;
+	}
+}
+EXPORT_SYMBOL_GPL(nft_validate_data_load);
+
+static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
+	[NFTA_VERDICT_CODE]	= { .type = NLA_U32 },
+	[NFTA_VERDICT_CHAIN]	= { .type = NLA_STRING,
+				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
+};
+
+static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+			    struct nft_data_desc *desc, const struct nlattr *nla)
+{
+	struct nlattr *tb[NFTA_VERDICT_MAX + 1];
+	struct nft_chain *chain;
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_VERDICT_CODE])
+		return -EINVAL;
+	data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+
+	switch (data->verdict) {
+	case NF_ACCEPT:
+	case NF_DROP:
+	case NF_QUEUE:
+	case NFT_CONTINUE:
+	case NFT_BREAK:
+	case NFT_RETURN:
+		desc->len = sizeof(data->verdict);
+		break;
+	case NFT_JUMP:
+	case NFT_GOTO:
+		if (!tb[NFTA_VERDICT_CHAIN])
+			return -EINVAL;
+		chain = nf_tables_chain_lookup(ctx->table,
+					       tb[NFTA_VERDICT_CHAIN]);
+		if (IS_ERR(chain))
+			return PTR_ERR(chain);
+		if (chain->flags & NFT_BASE_CHAIN)
+			return -EOPNOTSUPP;
+
+		chain->use++;
+		data->chain = chain;
+		desc->len = sizeof(data);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	desc->type = NFT_DATA_VERDICT;
+	return 0;
+}
+
+static void nft_verdict_uninit(const struct nft_data *data)
+{
+	switch (data->verdict) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		data->chain->use--;
+		break;
+	}
+}
+
+static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NFTA_DATA_VERDICT);
+	if (!nest)
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict)))
+		goto nla_put_failure;
+
+	switch (data->verdict) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name))
+			goto nla_put_failure;
+	}
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data,
+			  struct nft_data_desc *desc, const struct nlattr *nla)
+{
+	unsigned int len;
+
+	len = nla_len(nla);
+	if (len == 0)
+		return -EINVAL;
+	if (len > sizeof(data->data))
+		return -EOVERFLOW;
+
+	nla_memcpy(data->data, nla, sizeof(data->data));
+	desc->type = NFT_DATA_VALUE;
+	desc->len  = len;
+	return 0;
+}
+
+static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data,
+			  unsigned int len)
+{
+	return nla_put(skb, NFTA_DATA_VALUE, len, data->data);
+}
+
+static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
+	[NFTA_DATA_VALUE]	= { .type = NLA_BINARY,
+				    .len  = FIELD_SIZEOF(struct nft_data, data) },
+	[NFTA_DATA_VERDICT]	= { .type = NLA_NESTED },
+};
+
+/**
+ *	nft_data_init - parse nf_tables data netlink attributes
+ *
+ *	@ctx: context of the expression using the data
+ *	@data: destination struct nft_data
+ *	@desc: data description
+ *	@nla: netlink attribute containing data
+ *
+ *	Parse the netlink data attributes and initialize a struct nft_data.
+ *	The type and length of data are returned in the data description.
+ *
+ *	The caller can indicate that it only wants to accept data of type
+ *	NFT_DATA_VALUE by passing NULL for the ctx argument.
+ */
+int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+		  struct nft_data_desc *desc, const struct nlattr *nla)
+{
+	struct nlattr *tb[NFTA_DATA_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_DATA_VALUE])
+		return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
+	if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
+		return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(nft_data_init);
+
+/**
+ *	nft_data_uninit - release a nft_data item
+ *
+ *	@data: struct nft_data to release
+ *	@type: type of data
+ *
+ *	Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
+ *	all others need to be released by calling this function.
+ */
+void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
+{
+	switch (type) {
+	case NFT_DATA_VALUE:
+		return;
+	case NFT_DATA_VERDICT:
+		return nft_verdict_uninit(data);
+	default:
+		WARN_ON(1);
+	}
+}
+EXPORT_SYMBOL_GPL(nft_data_uninit);
+
+int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
+		  enum nft_data_types type, unsigned int len)
+{
+	struct nlattr *nest;
+	int err;
+
+	nest = nla_nest_start(skb, attr);
+	if (nest == NULL)
+		return -1;
+
+	switch (type) {
+	case NFT_DATA_VALUE:
+		err = nft_value_dump(skb, data, len);
+		break;
+	case NFT_DATA_VERDICT:
+		err = nft_verdict_dump(skb, data);
+		break;
+	default:
+		err = -EINVAL;
+		WARN_ON(1);
+	}
+
+	nla_nest_end(skb, nest);
+	return err;
+}
+EXPORT_SYMBOL_GPL(nft_data_dump);
+
+static int nf_tables_init_net(struct net *net)
+{
+	INIT_LIST_HEAD(&net->nft.af_info);
+	INIT_LIST_HEAD(&net->nft.commit_list);
+	return 0;
+}
+
+static struct pernet_operations nf_tables_net_ops = {
+	.init	= nf_tables_init_net,
+};
+
+static int __init nf_tables_module_init(void)
+{
+	int err;
+
+	info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS,
+		       GFP_KERNEL);
+	if (info == NULL) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	err = nf_tables_core_module_init();
+	if (err < 0)
+		goto err2;
+
+	err = nfnetlink_subsys_register(&nf_tables_subsys);
+	if (err < 0)
+		goto err3;
+
+	pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n");
+	return register_pernet_subsys(&nf_tables_net_ops);
+err3:
+	nf_tables_core_module_exit();
+err2:
+	kfree(info);
+err1:
+	return err;
+}
+
+static void __exit nf_tables_module_exit(void)
+{
+	unregister_pernet_subsys(&nf_tables_net_ops);
+	nfnetlink_subsys_unregister(&nf_tables_subsys);
+	nf_tables_core_module_exit();
+	kfree(info);
+}
+
+module_init(nf_tables_module_init);
+module_exit(nf_tables_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
new file mode 100644
index 0000000..cb9e685
--- /dev/null
+++ b/net/netfilter/nf_tables_core.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
+
+static void nft_cmp_fast_eval(const struct nft_expr *expr,
+			      struct nft_data data[NFT_REG_MAX + 1])
+{
+	const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+	u32 mask;
+
+	mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len);
+	if ((data[priv->sreg].data[0] & mask) == priv->data)
+		return;
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static bool nft_payload_fast_eval(const struct nft_expr *expr,
+				  struct nft_data data[NFT_REG_MAX + 1],
+				  const struct nft_pktinfo *pkt)
+{
+	const struct nft_payload *priv = nft_expr_priv(expr);
+	const struct sk_buff *skb = pkt->skb;
+	struct nft_data *dest = &data[priv->dreg];
+	unsigned char *ptr;
+
+	if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
+		ptr = skb_network_header(skb);
+	else
+		ptr = skb_network_header(skb) + pkt->xt.thoff;
+
+	ptr += priv->offset;
+
+	if (unlikely(ptr + priv->len >= skb_tail_pointer(skb)))
+		return false;
+
+	if (priv->len == 2)
+		*(u16 *)dest->data = *(u16 *)ptr;
+	else if (priv->len == 4)
+		*(u32 *)dest->data = *(u32 *)ptr;
+	else
+		*(u8 *)dest->data = *(u8 *)ptr;
+	return true;
+}
+
+struct nft_jumpstack {
+	const struct nft_chain	*chain;
+	const struct nft_rule	*rule;
+	int			rulenum;
+};
+
+static inline void
+nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt,
+		struct nft_jumpstack *jumpstack, unsigned int stackptr)
+{
+	struct nft_stats __percpu *stats;
+	const struct nft_chain *chain = stackptr ? jumpstack[0].chain : this;
+
+	rcu_read_lock_bh();
+	stats = rcu_dereference(nft_base_chain(chain)->stats);
+	__this_cpu_inc(stats->pkts);
+	__this_cpu_add(stats->bytes, pkt->skb->len);
+	rcu_read_unlock_bh();
+}
+
+enum nft_trace {
+	NFT_TRACE_RULE,
+	NFT_TRACE_RETURN,
+	NFT_TRACE_POLICY,
+};
+
+static const char *const comments[] = {
+	[NFT_TRACE_RULE]	= "rule",
+	[NFT_TRACE_RETURN]	= "return",
+	[NFT_TRACE_POLICY]	= "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+	.type = NF_LOG_TYPE_LOG,
+	.u = {
+		.log = {
+			.level = 4,
+			.logflags = NF_LOG_MASK,
+	        },
+	},
+};
+
+static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
+				    const struct nft_chain *chain,
+				    int rulenum, enum nft_trace type)
+{
+	struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+
+	nf_log_packet(net, pkt->xt.family, pkt->hooknum, pkt->skb, pkt->in,
+		      pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
+		      chain->table->name, chain->name, comments[type],
+		      rulenum);
+}
+
+unsigned int
+nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
+{
+	const struct nft_chain *chain = ops->priv;
+	const struct nft_rule *rule;
+	const struct nft_expr *expr, *last;
+	struct nft_data data[NFT_REG_MAX + 1];
+	unsigned int stackptr = 0;
+	struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
+	int rulenum = 0;
+	/*
+	 * Cache cursor to avoid problems in case that the cursor is updated
+	 * while traversing the ruleset.
+	 */
+	unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor);
+
+do_chain:
+	rule = list_entry(&chain->rules, struct nft_rule, list);
+next_rule:
+	data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+	list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
+
+		/* This rule is not active, skip. */
+		if (unlikely(rule->genmask & (1 << gencursor)))
+			continue;
+
+		rulenum++;
+
+		nft_rule_for_each_expr(expr, last, rule) {
+			if (expr->ops == &nft_cmp_fast_ops)
+				nft_cmp_fast_eval(expr, data);
+			else if (expr->ops != &nft_payload_fast_ops ||
+				 !nft_payload_fast_eval(expr, data, pkt))
+				expr->ops->eval(expr, data, pkt);
+
+			if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE)
+				break;
+		}
+
+		switch (data[NFT_REG_VERDICT].verdict) {
+		case NFT_BREAK:
+			data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+			/* fall through */
+		case NFT_CONTINUE:
+			continue;
+		}
+		break;
+	}
+
+	switch (data[NFT_REG_VERDICT].verdict) {
+	case NF_ACCEPT:
+	case NF_DROP:
+	case NF_QUEUE:
+		if (unlikely(pkt->skb->nf_trace))
+			nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+
+		return data[NFT_REG_VERDICT].verdict;
+	case NFT_JUMP:
+		if (unlikely(pkt->skb->nf_trace))
+			nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+
+		BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
+		jumpstack[stackptr].chain = chain;
+		jumpstack[stackptr].rule  = rule;
+		jumpstack[stackptr].rulenum = rulenum;
+		stackptr++;
+		/* fall through */
+	case NFT_GOTO:
+		chain = data[NFT_REG_VERDICT].chain;
+		goto do_chain;
+	case NFT_RETURN:
+		if (unlikely(pkt->skb->nf_trace))
+			nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
+
+		/* fall through */
+	case NFT_CONTINUE:
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	if (stackptr > 0) {
+		if (unlikely(pkt->skb->nf_trace))
+			nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
+
+		stackptr--;
+		chain = jumpstack[stackptr].chain;
+		rule  = jumpstack[stackptr].rule;
+		rulenum = jumpstack[stackptr].rulenum;
+		goto next_rule;
+	}
+	nft_chain_stats(chain, pkt, jumpstack, stackptr);
+
+	if (unlikely(pkt->skb->nf_trace))
+		nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_POLICY);
+
+	return nft_base_chain(chain)->policy;
+}
+EXPORT_SYMBOL_GPL(nft_do_chain_pktinfo);
+
+int __init nf_tables_core_module_init(void)
+{
+	int err;
+
+	err = nft_immediate_module_init();
+	if (err < 0)
+		goto err1;
+
+	err = nft_cmp_module_init();
+	if (err < 0)
+		goto err2;
+
+	err = nft_lookup_module_init();
+	if (err < 0)
+		goto err3;
+
+	err = nft_bitwise_module_init();
+	if (err < 0)
+		goto err4;
+
+	err = nft_byteorder_module_init();
+	if (err < 0)
+		goto err5;
+
+	err = nft_payload_module_init();
+	if (err < 0)
+		goto err6;
+
+	return 0;
+
+err6:
+	nft_byteorder_module_exit();
+err5:
+	nft_bitwise_module_exit();
+err4:
+	nft_lookup_module_exit();
+err3:
+	nft_cmp_module_exit();
+err2:
+	nft_immediate_module_exit();
+err1:
+	return err;
+}
+
+void nf_tables_core_module_exit(void)
+{
+	nft_payload_module_exit();
+	nft_byteorder_module_exit();
+	nft_bitwise_module_exit();
+	nft_lookup_module_exit();
+	nft_cmp_module_exit();
+	nft_immediate_module_exit();
+}
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
deleted file mode 100644
index 474d621..0000000
--- a/net/netfilter/nf_tproxy_core.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Transparent proxy support for Linux/iptables
- *
- * Copyright (c) 2006-2007 BalaBit IT Ltd.
- * Author: Balazs Scheidler, Krisztian Kovacs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-
-#include <linux/net.h>
-#include <linux/if.h>
-#include <linux/netdevice.h>
-#include <net/udp.h>
-#include <net/netfilter/nf_tproxy_core.h>
-
-
-static void
-nf_tproxy_destructor(struct sk_buff *skb)
-{
-	struct sock *sk = skb->sk;
-
-	skb->sk = NULL;
-	skb->destructor = NULL;
-
-	if (sk)
-		sock_put(sk);
-}
-
-/* consumes sk */
-void
-nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
-{
-	/* assigning tw sockets complicates things; most
-	 * skb->sk->X checks would have to test sk->sk_state first */
-	if (sk->sk_state == TCP_TIME_WAIT) {
-		inet_twsk_put(inet_twsk(sk));
-		return;
-	}
-
-	skb_orphan(skb);
-	skb->sk = sk;
-	skb->destructor = nf_tproxy_destructor;
-}
-EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
-
-static int __init nf_tproxy_init(void)
-{
-	pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n");
-	pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n");
-	return 0;
-}
-
-module_init(nf_tproxy_init);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Krisztian Kovacs");
-MODULE_DESCRIPTION("Transparent proxy support core routines");
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 572d87d..046aa13 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -147,9 +147,6 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	const struct nfnetlink_subsystem *ss;
 	int type, err;
 
-	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-		return -EPERM;
-
 	/* All the messages must at least contain nfgenmsg */
 	if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))
 		return 0;
@@ -217,9 +214,181 @@ replay:
 	}
 }
 
+static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
+				u_int16_t subsys_id)
+{
+	struct sk_buff *nskb, *oskb = skb;
+	struct net *net = sock_net(skb->sk);
+	const struct nfnetlink_subsystem *ss;
+	const struct nfnl_callback *nc;
+	bool success = true, done = false;
+	int err;
+
+	if (subsys_id >= NFNL_SUBSYS_COUNT)
+		return netlink_ack(skb, nlh, -EINVAL);
+replay:
+	nskb = netlink_skb_clone(oskb, GFP_KERNEL);
+	if (!nskb)
+		return netlink_ack(oskb, nlh, -ENOMEM);
+
+	nskb->sk = oskb->sk;
+	skb = nskb;
+
+	nfnl_lock(subsys_id);
+	ss = rcu_dereference_protected(table[subsys_id].subsys,
+				       lockdep_is_held(&table[subsys_id].mutex));
+	if (!ss) {
+#ifdef CONFIG_MODULES
+		nfnl_unlock(subsys_id);
+		request_module("nfnetlink-subsys-%d", subsys_id);
+		nfnl_lock(subsys_id);
+		ss = rcu_dereference_protected(table[subsys_id].subsys,
+					       lockdep_is_held(&table[subsys_id].mutex));
+		if (!ss)
+#endif
+		{
+			nfnl_unlock(subsys_id);
+			kfree_skb(nskb);
+			return netlink_ack(skb, nlh, -EOPNOTSUPP);
+		}
+	}
+
+	if (!ss->commit || !ss->abort) {
+		nfnl_unlock(subsys_id);
+		kfree_skb(nskb);
+		return netlink_ack(skb, nlh, -EOPNOTSUPP);
+	}
+
+	while (skb->len >= nlmsg_total_size(0)) {
+		int msglen, type;
+
+		nlh = nlmsg_hdr(skb);
+		err = 0;
+
+		if (nlh->nlmsg_len < NLMSG_HDRLEN) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		/* Only requests are handled by the kernel */
+		if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		type = nlh->nlmsg_type;
+		if (type == NFNL_MSG_BATCH_BEGIN) {
+			/* Malformed: Batch begin twice */
+			success = false;
+			goto done;
+		} else if (type == NFNL_MSG_BATCH_END) {
+			done = true;
+			goto done;
+		} else if (type < NLMSG_MIN_TYPE) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		/* We only accept a batch with messages for the same
+		 * subsystem.
+		 */
+		if (NFNL_SUBSYS_ID(type) != subsys_id) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		nc = nfnetlink_find_client(type, ss);
+		if (!nc) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		{
+			int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
+			u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
+			struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
+			struct nlattr *attr = (void *)nlh + min_len;
+			int attrlen = nlh->nlmsg_len - min_len;
+
+			err = nla_parse(cda, ss->cb[cb_id].attr_count,
+					attr, attrlen, ss->cb[cb_id].policy);
+			if (err < 0)
+				goto ack;
+
+			if (nc->call_batch) {
+				err = nc->call_batch(net->nfnl, skb, nlh,
+						     (const struct nlattr **)cda);
+			}
+
+			/* The lock was released to autoload some module, we
+			 * have to abort and start from scratch using the
+			 * original skb.
+			 */
+			if (err == -EAGAIN) {
+				ss->abort(skb);
+				nfnl_unlock(subsys_id);
+				kfree_skb(nskb);
+				goto replay;
+			}
+		}
+ack:
+		if (nlh->nlmsg_flags & NLM_F_ACK || err) {
+			/* We don't stop processing the batch on errors, thus,
+			 * userspace gets all the errors that the batch
+			 * triggers.
+			 */
+			netlink_ack(skb, nlh, err);
+			if (err)
+				success = false;
+		}
+
+		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (msglen > skb->len)
+			msglen = skb->len;
+		skb_pull(skb, msglen);
+	}
+done:
+	if (success && done)
+		ss->commit(skb);
+	else
+		ss->abort(skb);
+
+	nfnl_unlock(subsys_id);
+	kfree_skb(nskb);
+}
+
 static void nfnetlink_rcv(struct sk_buff *skb)
 {
-	netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
+	struct nlmsghdr *nlh = nlmsg_hdr(skb);
+	struct net *net = sock_net(skb->sk);
+	int msglen;
+
+	if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+	    skb->len < nlh->nlmsg_len)
+		return;
+
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+		netlink_ack(skb, nlh, -EPERM);
+		return;
+	}
+
+	if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) {
+		struct nfgenmsg *nfgenmsg;
+
+		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (msglen > skb->len)
+			msglen = skb->len;
+
+		if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+		    skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
+			return;
+
+		nfgenmsg = nlmsg_data(nlh);
+		skb_pull(skb, msglen);
+		nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id);
+	} else {
+		netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
+	}
 }
 
 #ifdef CONFIG_MODULES
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index a191b6d..9e287cb 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -67,9 +67,12 @@ static int
 nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
 			  const struct nlattr *attr)
 {
+	int err;
 	struct nlattr *tb[NFCTH_TUPLE_MAX+1];
 
-	nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol);
+	err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol);
+	if (err < 0)
+		return err;
 
 	if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM])
 		return -EINVAL;
@@ -121,9 +124,12 @@ static int
 nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy,
 			    const struct nlattr *attr)
 {
+	int err;
 	struct nlattr *tb[NFCTH_POLICY_MAX+1];
 
-	nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol);
+	err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol);
+	if (err < 0)
+		return err;
 
 	if (!tb[NFCTH_POLICY_NAME] ||
 	    !tb[NFCTH_POLICY_EXPECT_MAX] ||
@@ -153,8 +159,10 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
 	struct nf_conntrack_expect_policy *expect_policy;
 	struct nlattr *tb[NFCTH_POLICY_SET_MAX+1];
 
-	nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
-					nfnl_cthelper_expect_policy_set);
+	ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
+			       nfnl_cthelper_expect_policy_set);
+	if (ret < 0)
+		return ret;
 
 	if (!tb[NFCTH_POLICY_SET_NUM])
 		return -EINVAL;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 65074df..476accd 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -49,21 +49,20 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
 };
 
 static int
-ctnl_timeout_parse_policy(struct ctnl_timeout *timeout,
-			  struct nf_conntrack_l4proto *l4proto,
-			  struct net *net,
-			  const struct nlattr *attr)
+ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
+			  struct net *net, const struct nlattr *attr)
 {
 	int ret = 0;
 
 	if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) {
 		struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1];
 
-		nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
-				 attr, l4proto->ctnl_timeout.nla_policy);
+		ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
+				       attr, l4proto->ctnl_timeout.nla_policy);
+		if (ret < 0)
+			return ret;
 
-		ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net,
-							  &timeout->data);
+		ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
 	}
 	return ret;
 }
@@ -121,7 +120,8 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb,
 				goto err_proto_put;
 			}
 
-			ret = ctnl_timeout_parse_policy(matching, l4proto, net,
+			ret = ctnl_timeout_parse_policy(&matching->data,
+							l4proto, net,
 							cda[CTA_TIMEOUT_DATA]);
 			return ret;
 		}
@@ -136,7 +136,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb,
 		goto err_proto_put;
 	}
 
-	ret = ctnl_timeout_parse_policy(timeout, l4proto, net,
+	ret = ctnl_timeout_parse_policy(&timeout->data, l4proto, net,
 					cda[CTA_TIMEOUT_DATA]);
 	if (ret < 0)
 		goto err;
@@ -340,6 +340,147 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb,
 	return ret;
 }
 
+static int
+cttimeout_default_set(struct sock *ctnl, struct sk_buff *skb,
+		      const struct nlmsghdr *nlh,
+		      const struct nlattr * const cda[])
+{
+	__u16 l3num;
+	__u8 l4num;
+	struct nf_conntrack_l4proto *l4proto;
+	struct net *net = sock_net(skb->sk);
+	unsigned int *timeouts;
+	int ret;
+
+	if (!cda[CTA_TIMEOUT_L3PROTO] ||
+	    !cda[CTA_TIMEOUT_L4PROTO] ||
+	    !cda[CTA_TIMEOUT_DATA])
+		return -EINVAL;
+
+	l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
+	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
+	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+
+	/* This protocol is not supported, skip. */
+	if (l4proto->l4proto != l4num) {
+		ret = -EOPNOTSUPP;
+		goto err;
+	}
+
+	timeouts = l4proto->get_timeouts(net);
+
+	ret = ctnl_timeout_parse_policy(timeouts, l4proto, net,
+					cda[CTA_TIMEOUT_DATA]);
+	if (ret < 0)
+		goto err;
+
+	nf_ct_l4proto_put(l4proto);
+	return 0;
+err:
+	nf_ct_l4proto_put(l4proto);
+	return ret;
+}
+
+static int
+cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
+			    u32 seq, u32 type, int event,
+			    struct nf_conntrack_l4proto *l4proto)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	unsigned int flags = portid ? NLM_F_MULTI : 0;
+
+	event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8;
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
+	if (nlh == NULL)
+		goto nlmsg_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family = AF_UNSPEC;
+	nfmsg->version = NFNETLINK_V0;
+	nfmsg->res_id = 0;
+
+	if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) ||
+	    nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto))
+		goto nla_put_failure;
+
+	if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
+		struct nlattr *nest_parms;
+		unsigned int *timeouts = l4proto->get_timeouts(net);
+		int ret;
+
+		nest_parms = nla_nest_start(skb,
+					    CTA_TIMEOUT_DATA | NLA_F_NESTED);
+		if (!nest_parms)
+			goto nla_put_failure;
+
+		ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
+		if (ret < 0)
+			goto nla_put_failure;
+
+		nla_nest_end(skb, nest_parms);
+	}
+
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+static int cttimeout_default_get(struct sock *ctnl, struct sk_buff *skb,
+				 const struct nlmsghdr *nlh,
+				 const struct nlattr * const cda[])
+{
+	__u16 l3num;
+	__u8 l4num;
+	struct nf_conntrack_l4proto *l4proto;
+	struct net *net = sock_net(skb->sk);
+	struct sk_buff *skb2;
+	int ret, err;
+
+	if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO])
+		return -EINVAL;
+
+	l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
+	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
+	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+
+	/* This protocol is not supported, skip. */
+	if (l4proto->l4proto != l4num) {
+		err = -EOPNOTSUPP;
+		goto err;
+	}
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (skb2 == NULL) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	ret = cttimeout_default_fill_info(net, skb2, NETLINK_CB(skb).portid,
+					  nlh->nlmsg_seq,
+					  NFNL_MSG_TYPE(nlh->nlmsg_type),
+					  IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
+					  l4proto);
+	if (ret <= 0) {
+		kfree_skb(skb2);
+		err = -ENOMEM;
+		goto err;
+	}
+	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+	if (ret > 0)
+		ret = 0;
+
+	/* this avoids a loop in nfnetlink. */
+	return ret == -EAGAIN ? -ENOBUFS : ret;
+err:
+	nf_ct_l4proto_put(l4proto);
+	return err;
+}
+
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 static struct ctnl_timeout *ctnl_timeout_find_get(const char *name)
 {
@@ -382,6 +523,12 @@ static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
 	[IPCTNL_MSG_TIMEOUT_DELETE]	= { .call = cttimeout_del_timeout,
 					    .attr_count = CTA_TIMEOUT_MAX,
 					    .policy = cttimeout_nla_policy },
+	[IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set,
+					    .attr_count = CTA_TIMEOUT_MAX,
+					    .policy = cttimeout_nla_policy },
+	[IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get,
+					    .attr_count = CTA_TIMEOUT_MAX,
+					    .policy = cttimeout_nla_policy },
 };
 
 static const struct nfnetlink_subsystem cttimeout_subsys = {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 962e979..3c4b69e 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -319,7 +319,8 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags)
 }
 
 static struct sk_buff *
-nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size)
+nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
+		 unsigned int pkt_size)
 {
 	struct sk_buff *skb;
 	unsigned int n;
@@ -328,13 +329,13 @@ nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size)
 	 * message.  WARNING: has to be <= 128k due to slab restrictions */
 
 	n = max(inst_size, pkt_size);
-	skb = nfnetlink_alloc_skb(&init_net, n, peer_portid, GFP_ATOMIC);
+	skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC);
 	if (!skb) {
 		if (n > pkt_size) {
 			/* try to allocate only as much as we need for current
 			 * packet */
 
-			skb = nfnetlink_alloc_skb(&init_net, pkt_size,
+			skb = nfnetlink_alloc_skb(net, pkt_size,
 						  peer_portid, GFP_ATOMIC);
 			if (!skb)
 				pr_err("nfnetlink_log: can't even alloc %u bytes\n",
@@ -419,6 +420,7 @@ __build_packet_message(struct nfnl_log_net *log,
 	nfmsg->version = NFNETLINK_V0;
 	nfmsg->res_id = htons(inst->group_num);
 
+	memset(&pmsg, 0, sizeof(pmsg));
 	pmsg.hw_protocol	= skb->protocol;
 	pmsg.hook		= hooknum;
 
@@ -498,7 +500,10 @@ __build_packet_message(struct nfnl_log_net *log,
 	if (indev && skb->dev &&
 	    skb->mac_header != skb->network_header) {
 		struct nfulnl_msg_packet_hw phw;
-		int len = dev_parse_header(skb, phw.hw_addr);
+		int len;
+
+		memset(&phw, 0, sizeof(phw));
+		len = dev_parse_header(skb, phw.hw_addr);
 		if (len > 0) {
 			phw.hw_addrlen = htons(len);
 			if (nla_put(inst->skb, NFULA_HWADDR, sizeof(phw), &phw))
@@ -698,8 +703,8 @@ nfulnl_log_packet(struct net *net,
 	}
 
 	if (!inst->skb) {
-		inst->skb = nfulnl_alloc_skb(inst->peer_portid, inst->nlbufsiz,
-					     size);
+		inst->skb = nfulnl_alloc_skb(net, inst->peer_portid,
+					     inst->nlbufsiz, size);
 		if (!inst->skb)
 			goto alloc_failure;
 	}
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 5352b2d..21258cf 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -41,6 +41,14 @@
 
 #define NFQNL_QMAX_DEFAULT 1024
 
+/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
+ * includes the header length. Thus, the maximum packet length that we
+ * support is 65531 bytes. We send truncated packets if the specified length
+ * is larger than that.  Userspace can check for presence of NFQA_CAP_LEN
+ * attribute to detect truncation.
+ */
+#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)
+
 struct nfqnl_instance {
 	struct hlist_node hlist;		/* global list of queues */
 	struct rcu_head rcu;
@@ -122,7 +130,7 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num,
 	inst->queue_num = queue_num;
 	inst->peer_portid = portid;
 	inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
-	inst->copy_range = 0xffff;
+	inst->copy_range = NFQNL_MAX_COPY_RANGE;
 	inst->copy_mode = NFQNL_COPY_NONE;
 	spin_lock_init(&inst->lock);
 	INIT_LIST_HEAD(&inst->queue_list);
@@ -272,12 +280,17 @@ nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
 	skb_shinfo(to)->nr_frags = j;
 }
 
-static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet)
+static int
+nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet,
+		      bool csum_verify)
 {
 	__u32 flags = 0;
 
 	if (packet->ip_summed == CHECKSUM_PARTIAL)
 		flags = NFQA_SKB_CSUMNOTREADY;
+	else if (csum_verify)
+		flags = NFQA_SKB_CSUM_NOTVERIFIED;
+
 	if (skb_is_gso(packet))
 		flags |= NFQA_SKB_GSO;
 
@@ -285,7 +298,7 @@ static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet)
 }
 
 static struct sk_buff *
-nfqnl_build_packet_message(struct nfqnl_instance *queue,
+nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 			   struct nf_queue_entry *entry,
 			   __be32 **packet_id_ptr)
 {
@@ -302,6 +315,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	struct net_device *outdev;
 	struct nf_conn *ct = NULL;
 	enum ip_conntrack_info uninitialized_var(ctinfo);
+	bool csum_verify;
 
 	size =    nlmsg_total_size(sizeof(struct nfgenmsg))
 		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
@@ -319,6 +333,12 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	if (entskb->tstamp.tv64)
 		size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
 
+	if (entry->hook <= NF_INET_FORWARD ||
+	   (entry->hook == NF_INET_POST_ROUTING && entskb->sk == NULL))
+		csum_verify = !skb_csum_unnecessary(entskb);
+	else
+		csum_verify = false;
+
 	outdev = entry->outdev;
 
 	switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
@@ -333,10 +353,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			return NULL;
 
 		data_len = ACCESS_ONCE(queue->copy_range);
-		if (data_len == 0 || data_len > entskb->len)
+		if (data_len > entskb->len)
 			data_len = entskb->len;
 
-
 		if (!entskb->head_frag ||
 		    skb_headlen(entskb) < L1_CACHE_BYTES ||
 		    skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS)
@@ -353,7 +372,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	if (queue->flags & NFQA_CFG_F_CONNTRACK)
 		ct = nfqnl_ct_get(entskb, &size, &ctinfo);
 
-	skb = nfnetlink_alloc_skb(&init_net, size, queue->peer_portid,
+	skb = nfnetlink_alloc_skb(net, size, queue->peer_portid,
 				  GFP_ATOMIC);
 	if (!skb)
 		return NULL;
@@ -444,7 +463,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	if (indev && entskb->dev &&
 	    entskb->mac_header != entskb->network_header) {
 		struct nfqnl_msg_packet_hw phw;
-		int len = dev_parse_header(entskb, phw.hw_addr);
+		int len;
+
+		memset(&phw, 0, sizeof(phw));
+		len = dev_parse_header(entskb, phw.hw_addr);
 		if (len) {
 			phw.hw_addrlen = htons(len);
 			if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw))
@@ -465,10 +487,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
 		goto nla_put_failure;
 
-	if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
+	if (cap_len > data_len &&
+	    nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
 		goto nla_put_failure;
 
-	if (nfqnl_put_packet_info(skb, entskb))
+	if (nfqnl_put_packet_info(skb, entskb, csum_verify))
 		goto nla_put_failure;
 
 	if (data_len) {
@@ -502,17 +525,13 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
 	__be32 *packet_id_ptr;
 	int failopen = 0;
 
-	nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr);
+	nskb = nfqnl_build_packet_message(net, queue, entry, &packet_id_ptr);
 	if (nskb == NULL) {
 		err = -ENOMEM;
 		goto err_out;
 	}
 	spin_lock_bh(&queue->lock);
 
-	if (!queue->peer_portid) {
-		err = -EINVAL;
-		goto err_out_free_nskb;
-	}
 	if (queue->queue_total >= queue->queue_maxlen) {
 		if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
 			failopen = 1;
@@ -731,13 +750,8 @@ nfqnl_set_mode(struct nfqnl_instance *queue,
 
 	case NFQNL_COPY_PACKET:
 		queue->copy_mode = mode;
-		/* We're using struct nlattr which has 16bit nla_len. Note that
-		 * nla_len includes the header length. Thus, the maximum packet
-		 * length that we support is 65531 bytes. We send truncated
-		 * packets if the specified length is larger than that.
-		 */
-		if (range > 0xffff - NLA_HDRLEN)
-			queue->copy_range = 0xffff - NLA_HDRLEN;
+		if (range == 0 || range > NFQNL_MAX_COPY_RANGE)
+			queue->copy_range = NFQNL_MAX_COPY_RANGE;
 		else
 			queue->copy_range = range;
 		break;
@@ -800,7 +814,7 @@ static int
 nfqnl_rcv_dev_event(struct notifier_block *this,
 		    unsigned long event, void *ptr)
 {
-	struct net_device *dev = ptr;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 
 	/* Drop any packets associated with the downed device */
 	if (event == NETDEV_DOWN)
@@ -848,6 +862,7 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
 	[NFQA_MARK]		= { .type = NLA_U32 },
 	[NFQA_PAYLOAD]		= { .type = NLA_UNSPEC },
 	[NFQA_CT]		= { .type = NLA_UNSPEC },
+	[NFQA_EXP]		= { .type = NLA_UNSPEC },
 };
 
 static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
@@ -976,9 +991,14 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 	if (entry == NULL)
 		return -ENOENT;
 
-	rcu_read_lock();
-	if (nfqa[NFQA_CT] && (queue->flags & NFQA_CFG_F_CONNTRACK))
+	if (nfqa[NFQA_CT]) {
 		ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo);
+		if (ct && nfqa[NFQA_EXP]) {
+			nfqnl_attach_expect(ct, nfqa[NFQA_EXP],
+					    NETLINK_CB(skb).portid,
+					    nlmsg_report(nlh));
+		}
+	}
 
 	if (nfqa[NFQA_PAYLOAD]) {
 		u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]);
@@ -989,9 +1009,8 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 			verdict = NF_DROP;
 
 		if (ct)
-			nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff);
+			nfqnl_ct_seq_adjust(entry->skb, ct, ctinfo, diff);
 	}
-	rcu_read_unlock();
 
 	if (nfqa[NFQA_MARK])
 		entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c
index ab61d66..96cac50 100644
--- a/net/netfilter/nfnetlink_queue_ct.c
+++ b/net/netfilter/nfnetlink_queue_ct.c
@@ -87,12 +87,27 @@ nla_put_failure:
 void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
 			 enum ip_conntrack_info ctinfo, int diff)
 {
-	struct nfq_ct_nat_hook *nfq_nat_ct;
+	struct nfq_ct_hook *nfq_ct;
 
-	nfq_nat_ct = rcu_dereference(nfq_ct_nat_hook);
-	if (nfq_nat_ct == NULL)
+	nfq_ct = rcu_dereference(nfq_ct_hook);
+	if (nfq_ct == NULL)
 		return;
 
 	if ((ct->status & IPS_NAT_MASK) && diff)
-		nfq_nat_ct->seq_adjust(skb, ct, ctinfo, diff);
+		nfq_ct->seq_adjust(skb, ct, ctinfo, diff);
+}
+
+int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
+			u32 portid, u32 report)
+{
+	struct nfq_ct_hook *nfq_ct;
+
+	if (nf_ct_is_untracked(ct))
+		return 0;
+
+	nfq_ct = rcu_dereference(nfq_ct_hook);
+	if (nfq_ct == NULL)
+		return -EOPNOTSUPP;
+
+	return nfq_ct->attach_expect(attr, ct, portid, report);
 }
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
new file mode 100644
index 0000000..4fb6ee2
--- /dev/null
+++ b/net/netfilter/nft_bitwise.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_bitwise {
+	enum nft_registers	sreg:8;
+	enum nft_registers	dreg:8;
+	u8			len;
+	struct nft_data		mask;
+	struct nft_data		xor;
+};
+
+static void nft_bitwise_eval(const struct nft_expr *expr,
+			     struct nft_data data[NFT_REG_MAX + 1],
+			     const struct nft_pktinfo *pkt)
+{
+	const struct nft_bitwise *priv = nft_expr_priv(expr);
+	const struct nft_data *src = &data[priv->sreg];
+	struct nft_data *dst = &data[priv->dreg];
+	unsigned int i;
+
+	for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) {
+		dst->data[i] = (src->data[i] & priv->mask.data[i]) ^
+			       priv->xor.data[i];
+	}
+}
+
+static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
+	[NFTA_BITWISE_SREG]	= { .type = NLA_U32 },
+	[NFTA_BITWISE_DREG]	= { .type = NLA_U32 },
+	[NFTA_BITWISE_LEN]	= { .type = NLA_U32 },
+	[NFTA_BITWISE_MASK]	= { .type = NLA_NESTED },
+	[NFTA_BITWISE_XOR]	= { .type = NLA_NESTED },
+};
+
+static int nft_bitwise_init(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nlattr * const tb[])
+{
+	struct nft_bitwise *priv = nft_expr_priv(expr);
+	struct nft_data_desc d1, d2;
+	int err;
+
+	if (tb[NFTA_BITWISE_SREG] == NULL ||
+	    tb[NFTA_BITWISE_DREG] == NULL ||
+	    tb[NFTA_BITWISE_LEN] == NULL ||
+	    tb[NFTA_BITWISE_MASK] == NULL ||
+	    tb[NFTA_BITWISE_XOR] == NULL)
+		return -EINVAL;
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG]));
+	err = nft_validate_input_register(priv->sreg);
+	if (err < 0)
+		return err;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+	if (err < 0)
+		return err;
+
+	priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
+
+	err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]);
+	if (err < 0)
+		return err;
+	if (d1.len != priv->len)
+		return -EINVAL;
+
+	err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]);
+	if (err < 0)
+		return err;
+	if (d2.len != priv->len)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_bitwise *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_BITWISE_XOR, &priv->xor,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_bitwise_type;
+static const struct nft_expr_ops nft_bitwise_ops = {
+	.type		= &nft_bitwise_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
+	.eval		= nft_bitwise_eval,
+	.init		= nft_bitwise_init,
+	.dump		= nft_bitwise_dump,
+};
+
+static struct nft_expr_type nft_bitwise_type __read_mostly = {
+	.name		= "bitwise",
+	.ops		= &nft_bitwise_ops,
+	.policy		= nft_bitwise_policy,
+	.maxattr	= NFTA_BITWISE_MAX,
+	.owner		= THIS_MODULE,
+};
+
+int __init nft_bitwise_module_init(void)
+{
+	return nft_register_expr(&nft_bitwise_type);
+}
+
+void nft_bitwise_module_exit(void)
+{
+	nft_unregister_expr(&nft_bitwise_type);
+}
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
new file mode 100644
index 0000000..c39ed8d
--- /dev/null
+++ b/net/netfilter/nft_byteorder.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_byteorder {
+	enum nft_registers	sreg:8;
+	enum nft_registers	dreg:8;
+	enum nft_byteorder_ops	op:8;
+	u8			len;
+	u8			size;
+};
+
+static void nft_byteorder_eval(const struct nft_expr *expr,
+			       struct nft_data data[NFT_REG_MAX + 1],
+			       const struct nft_pktinfo *pkt)
+{
+	const struct nft_byteorder *priv = nft_expr_priv(expr);
+	struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg];
+	union { u32 u32; u16 u16; } *s, *d;
+	unsigned int i;
+
+	s = (void *)src->data;
+	d = (void *)dst->data;
+
+	switch (priv->size) {
+	case 4:
+		switch (priv->op) {
+		case NFT_BYTEORDER_NTOH:
+			for (i = 0; i < priv->len / 4; i++)
+				d[i].u32 = ntohl((__force __be32)s[i].u32);
+			break;
+		case NFT_BYTEORDER_HTON:
+			for (i = 0; i < priv->len / 4; i++)
+				d[i].u32 = (__force __u32)htonl(s[i].u32);
+			break;
+		}
+		break;
+	case 2:
+		switch (priv->op) {
+		case NFT_BYTEORDER_NTOH:
+			for (i = 0; i < priv->len / 2; i++)
+				d[i].u16 = ntohs((__force __be16)s[i].u16);
+			break;
+		case NFT_BYTEORDER_HTON:
+			for (i = 0; i < priv->len / 2; i++)
+				d[i].u16 = (__force __u16)htons(s[i].u16);
+			break;
+		}
+		break;
+	}
+}
+
+static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = {
+	[NFTA_BYTEORDER_SREG]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_DREG]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_OP]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_LEN]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_SIZE]	= { .type = NLA_U32 },
+};
+
+static int nft_byteorder_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	struct nft_byteorder *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_BYTEORDER_SREG] == NULL ||
+	    tb[NFTA_BYTEORDER_DREG] == NULL ||
+	    tb[NFTA_BYTEORDER_LEN] == NULL ||
+	    tb[NFTA_BYTEORDER_SIZE] == NULL ||
+	    tb[NFTA_BYTEORDER_OP] == NULL)
+		return -EINVAL;
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG]));
+	err = nft_validate_input_register(priv->sreg);
+	if (err < 0)
+		return err;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+	if (err < 0)
+		return err;
+
+	priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP]));
+	switch (priv->op) {
+	case NFT_BYTEORDER_NTOH:
+	case NFT_BYTEORDER_HTON:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
+	if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data))
+		return -EINVAL;
+
+	priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE]));
+	switch (priv->size) {
+	case 2:
+	case 4:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_byteorder *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_byteorder_type;
+static const struct nft_expr_ops nft_byteorder_ops = {
+	.type		= &nft_byteorder_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
+	.eval		= nft_byteorder_eval,
+	.init		= nft_byteorder_init,
+	.dump		= nft_byteorder_dump,
+};
+
+static struct nft_expr_type nft_byteorder_type __read_mostly = {
+	.name		= "byteorder",
+	.ops		= &nft_byteorder_ops,
+	.policy		= nft_byteorder_policy,
+	.maxattr	= NFTA_BYTEORDER_MAX,
+	.owner		= THIS_MODULE,
+};
+
+int __init nft_byteorder_module_init(void)
+{
+	return nft_register_expr(&nft_byteorder_type);
+}
+
+void nft_byteorder_module_exit(void)
+{
+	nft_unregister_expr(&nft_byteorder_type);
+}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
new file mode 100644
index 0000000..954925d
--- /dev/null
+++ b/net/netfilter/nft_cmp.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_cmp_expr {
+	struct nft_data		data;
+	enum nft_registers	sreg:8;
+	u8			len;
+	enum nft_cmp_ops	op:8;
+};
+
+static void nft_cmp_eval(const struct nft_expr *expr,
+			 struct nft_data data[NFT_REG_MAX + 1],
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+	int d;
+
+	d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len);
+	switch (priv->op) {
+	case NFT_CMP_EQ:
+		if (d != 0)
+			goto mismatch;
+		break;
+	case NFT_CMP_NEQ:
+		if (d == 0)
+			goto mismatch;
+		break;
+	case NFT_CMP_LT:
+		if (d == 0)
+			goto mismatch;
+	case NFT_CMP_LTE:
+		if (d > 0)
+			goto mismatch;
+		break;
+	case NFT_CMP_GT:
+		if (d == 0)
+			goto mismatch;
+	case NFT_CMP_GTE:
+		if (d < 0)
+			goto mismatch;
+		break;
+	}
+	return;
+
+mismatch:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = {
+	[NFTA_CMP_SREG]		= { .type = NLA_U32 },
+	[NFTA_CMP_OP]		= { .type = NLA_U32 },
+	[NFTA_CMP_DATA]		= { .type = NLA_NESTED },
+};
+
+static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_cmp_expr *priv = nft_expr_priv(expr);
+	struct nft_data_desc desc;
+	int err;
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+	priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
+
+	err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+	BUG_ON(err < 0);
+
+	priv->len = desc.len;
+	return 0;
+}
+
+static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op)))
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_cmp_type;
+static const struct nft_expr_ops nft_cmp_ops = {
+	.type		= &nft_cmp_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)),
+	.eval		= nft_cmp_eval,
+	.init		= nft_cmp_init,
+	.dump		= nft_cmp_dump,
+};
+
+static int nft_cmp_fast_init(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr,
+			     const struct nlattr * const tb[])
+{
+	struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+	struct nft_data_desc desc;
+	struct nft_data data;
+	u32 mask;
+	int err;
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+
+	err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+	BUG_ON(err < 0);
+	desc.len *= BITS_PER_BYTE;
+
+	mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len);
+	priv->data = data.data[0] & mask;
+	priv->len  = desc.len;
+	return 0;
+}
+
+static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+	struct nft_data data;
+
+	if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ)))
+		goto nla_put_failure;
+
+	data.data[0] = priv->data;
+	if (nft_data_dump(skb, NFTA_CMP_DATA, &data,
+			  NFT_DATA_VALUE, priv->len / BITS_PER_BYTE) < 0)
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+const struct nft_expr_ops nft_cmp_fast_ops = {
+	.type		= &nft_cmp_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_cmp_fast_expr)),
+	.eval		= NULL,	/* inlined */
+	.init		= nft_cmp_fast_init,
+	.dump		= nft_cmp_fast_dump,
+};
+
+static const struct nft_expr_ops *
+nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+{
+	struct nft_data_desc desc;
+	struct nft_data data;
+	enum nft_registers sreg;
+	enum nft_cmp_ops op;
+	int err;
+
+	if (tb[NFTA_CMP_SREG] == NULL ||
+	    tb[NFTA_CMP_OP] == NULL ||
+	    tb[NFTA_CMP_DATA] == NULL)
+		return ERR_PTR(-EINVAL);
+
+	sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+	err = nft_validate_input_register(sreg);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
+	switch (op) {
+	case NFT_CMP_EQ:
+	case NFT_CMP_NEQ:
+	case NFT_CMP_LT:
+	case NFT_CMP_LTE:
+	case NFT_CMP_GT:
+	case NFT_CMP_GTE:
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ)
+		return &nft_cmp_fast_ops;
+	else
+		return &nft_cmp_ops;
+}
+
+static struct nft_expr_type nft_cmp_type __read_mostly = {
+	.name		= "cmp",
+	.select_ops	= nft_cmp_select_ops,
+	.policy		= nft_cmp_policy,
+	.maxattr	= NFTA_CMP_MAX,
+	.owner		= THIS_MODULE,
+};
+
+int __init nft_cmp_module_init(void)
+{
+	return nft_register_expr(&nft_cmp_type);
+}
+
+void nft_cmp_module_exit(void)
+{
+	nft_unregister_expr(&nft_cmp_type);
+}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
new file mode 100644
index 0000000..da0c1f4
--- /dev/null
+++ b/net/netfilter/nft_compat.c
@@ -0,0 +1,775 @@
+/*
+ * (C) 2012-2013 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This software has been sponsored by Sophos Astaro <http://www.sophos.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter/nf_tables_compat.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <asm/uaccess.h> /* for set_fs */
+#include <net/netfilter/nf_tables.h>
+
+union nft_entry {
+	struct ipt_entry e4;
+	struct ip6t_entry e6;
+};
+
+static inline void
+nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info)
+{
+	par->target	= xt;
+	par->targinfo	= xt_info;
+	par->hotdrop	= false;
+}
+
+static void nft_target_eval(const struct nft_expr *expr,
+			    struct nft_data data[NFT_REG_MAX + 1],
+			    const struct nft_pktinfo *pkt)
+{
+	void *info = nft_expr_priv(expr);
+	struct xt_target *target = expr->ops->data;
+	struct sk_buff *skb = pkt->skb;
+	int ret;
+
+	nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info);
+
+	ret = target->target(skb, &pkt->xt);
+
+	if (pkt->xt.hotdrop)
+		ret = NF_DROP;
+
+	switch(ret) {
+	case XT_CONTINUE:
+		data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+		break;
+	default:
+		data[NFT_REG_VERDICT].verdict = ret;
+		break;
+	}
+	return;
+}
+
+static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
+	[NFTA_TARGET_NAME]	= { .type = NLA_NUL_STRING },
+	[NFTA_TARGET_REV]	= { .type = NLA_U32 },
+	[NFTA_TARGET_INFO]	= { .type = NLA_BINARY },
+};
+
+static void
+nft_target_set_tgchk_param(struct xt_tgchk_param *par,
+			   const struct nft_ctx *ctx,
+			   struct xt_target *target, void *info,
+			   union nft_entry *entry, u8 proto, bool inv)
+{
+	par->net	= &init_net;
+	par->table	= ctx->table->name;
+	switch (ctx->afi->family) {
+	case AF_INET:
+		entry->e4.ip.proto = proto;
+		entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
+		break;
+	case AF_INET6:
+		entry->e6.ipv6.proto = proto;
+		entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
+		break;
+	}
+	par->entryinfo	= entry;
+	par->target	= target;
+	par->targinfo	= info;
+	if (ctx->chain->flags & NFT_BASE_CHAIN) {
+		const struct nft_base_chain *basechain =
+						nft_base_chain(ctx->chain);
+		const struct nf_hook_ops *ops = &basechain->ops;
+
+		par->hook_mask = 1 << ops->hooknum;
+	}
+	par->family	= ctx->afi->family;
+}
+
+static void target_compat_from_user(struct xt_target *t, void *in, void *out)
+{
+#ifdef CONFIG_COMPAT
+	if (t->compat_from_user) {
+		int pad;
+
+		t->compat_from_user(out, in);
+		pad = XT_ALIGN(t->targetsize) - t->targetsize;
+		if (pad > 0)
+			memset(out + t->targetsize, 0, pad);
+	} else
+#endif
+		memcpy(out, in, XT_ALIGN(t->targetsize));
+}
+
+static inline int nft_compat_target_offset(struct xt_target *target)
+{
+#ifdef CONFIG_COMPAT
+	return xt_compat_target_offset(target);
+#else
+	return 0;
+#endif
+}
+
+static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = {
+	[NFTA_RULE_COMPAT_PROTO]	= { .type = NLA_U32 },
+	[NFTA_RULE_COMPAT_FLAGS]	= { .type = NLA_U32 },
+};
+
+static int nft_parse_compat(const struct nlattr *attr, u8 *proto, bool *inv)
+{
+	struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
+	u32 flags;
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr,
+			       nft_rule_compat_policy);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_RULE_COMPAT_PROTO] || !tb[NFTA_RULE_COMPAT_FLAGS])
+		return -EINVAL;
+
+	flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS]));
+	if (flags & ~NFT_RULE_COMPAT_F_MASK)
+		return -EINVAL;
+	if (flags & NFT_RULE_COMPAT_F_INV)
+		*inv = true;
+
+	*proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+	return 0;
+}
+
+static int
+nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		const struct nlattr * const tb[])
+{
+	void *info = nft_expr_priv(expr);
+	struct xt_target *target = expr->ops->data;
+	struct xt_tgchk_param par;
+	size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
+	u8 proto = 0;
+	bool inv = false;
+	union nft_entry e = {};
+	int ret;
+
+	target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info);
+
+	if (ctx->nla[NFTA_RULE_COMPAT]) {
+		ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv);
+		if (ret < 0)
+			goto err;
+	}
+
+	nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
+
+	ret = xt_check_target(&par, size, proto, inv);
+	if (ret < 0)
+		goto err;
+
+	/* The standard target cannot be used */
+	if (target->target == NULL) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	return 0;
+err:
+	module_put(target->me);
+	return ret;
+}
+
+static void
+nft_target_destroy(const struct nft_expr *expr)
+{
+	struct xt_target *target = expr->ops->data;
+
+	module_put(target->me);
+}
+
+static int
+target_dump_info(struct sk_buff *skb, const struct xt_target *t, const void *in)
+{
+	int ret;
+
+#ifdef CONFIG_COMPAT
+	if (t->compat_to_user) {
+		mm_segment_t old_fs;
+		void *out;
+
+		out = kmalloc(XT_ALIGN(t->targetsize), GFP_ATOMIC);
+		if (out == NULL)
+			return -ENOMEM;
+
+		/* We want to reuse existing compat_to_user */
+		old_fs = get_fs();
+		set_fs(KERNEL_DS);
+		t->compat_to_user(out, in);
+		set_fs(old_fs);
+		ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), out);
+		kfree(out);
+	} else
+#endif
+		ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), in);
+
+	return ret;
+}
+
+static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct xt_target *target = expr->ops->data;
+	void *info = nft_expr_priv(expr);
+
+	if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) ||
+	    nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) ||
+	    target_dump_info(skb, target, info))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_target_validate(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nft_data **data)
+{
+	struct xt_target *target = expr->ops->data;
+	unsigned int hook_mask = 0;
+
+	if (ctx->chain->flags & NFT_BASE_CHAIN) {
+		const struct nft_base_chain *basechain =
+						nft_base_chain(ctx->chain);
+		const struct nf_hook_ops *ops = &basechain->ops;
+
+		hook_mask = 1 << ops->hooknum;
+		if (hook_mask & target->hooks)
+			return 0;
+
+		/* This target is being called from an invalid chain */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void nft_match_eval(const struct nft_expr *expr,
+			   struct nft_data data[NFT_REG_MAX + 1],
+			   const struct nft_pktinfo *pkt)
+{
+	void *info = nft_expr_priv(expr);
+	struct xt_match *match = expr->ops->data;
+	struct sk_buff *skb = pkt->skb;
+	bool ret;
+
+	nft_compat_set_par((struct xt_action_param *)&pkt->xt, match, info);
+
+	ret = match->match(skb, (struct xt_action_param *)&pkt->xt);
+
+	if (pkt->xt.hotdrop) {
+		data[NFT_REG_VERDICT].verdict = NF_DROP;
+		return;
+	}
+
+	switch(ret) {
+	case true:
+		data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+		break;
+	case false:
+		data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+		break;
+	}
+}
+
+static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
+	[NFTA_MATCH_NAME]	= { .type = NLA_NUL_STRING },
+	[NFTA_MATCH_REV]	= { .type = NLA_U32 },
+	[NFTA_MATCH_INFO]	= { .type = NLA_BINARY },
+};
+
+/* struct xt_mtchk_param and xt_tgchk_param look very similar */
+static void
+nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
+			  struct xt_match *match, void *info,
+			  union nft_entry *entry, u8 proto, bool inv)
+{
+	par->net	= &init_net;
+	par->table	= ctx->table->name;
+	switch (ctx->afi->family) {
+	case AF_INET:
+		entry->e4.ip.proto = proto;
+		entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
+		break;
+	case AF_INET6:
+		entry->e6.ipv6.proto = proto;
+		entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
+		break;
+	}
+	par->entryinfo	= entry;
+	par->match	= match;
+	par->matchinfo	= info;
+	if (ctx->chain->flags & NFT_BASE_CHAIN) {
+		const struct nft_base_chain *basechain =
+						nft_base_chain(ctx->chain);
+		const struct nf_hook_ops *ops = &basechain->ops;
+
+		par->hook_mask = 1 << ops->hooknum;
+	}
+	par->family	= ctx->afi->family;
+}
+
+static void match_compat_from_user(struct xt_match *m, void *in, void *out)
+{
+#ifdef CONFIG_COMPAT
+	if (m->compat_from_user) {
+		int pad;
+
+		m->compat_from_user(out, in);
+		pad = XT_ALIGN(m->matchsize) - m->matchsize;
+		if (pad > 0)
+			memset(out + m->matchsize, 0, pad);
+	} else
+#endif
+		memcpy(out, in, XT_ALIGN(m->matchsize));
+}
+
+static int
+nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		const struct nlattr * const tb[])
+{
+	void *info = nft_expr_priv(expr);
+	struct xt_match *match = expr->ops->data;
+	struct xt_mtchk_param par;
+	size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
+	u8 proto = 0;
+	bool inv = false;
+	union nft_entry e = {};
+	int ret;
+
+	match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info);
+
+	if (ctx->nla[NFTA_RULE_COMPAT]) {
+		ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv);
+		if (ret < 0)
+			goto err;
+	}
+
+	nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
+
+	ret = xt_check_match(&par, size, proto, inv);
+	if (ret < 0)
+		goto err;
+
+	return 0;
+err:
+	module_put(match->me);
+	return ret;
+}
+
+static void
+nft_match_destroy(const struct nft_expr *expr)
+{
+	struct xt_match *match = expr->ops->data;
+
+	module_put(match->me);
+}
+
+static int
+match_dump_info(struct sk_buff *skb, const struct xt_match *m, const void *in)
+{
+	int ret;
+
+#ifdef CONFIG_COMPAT
+	if (m->compat_to_user) {
+		mm_segment_t old_fs;
+		void *out;
+
+		out = kmalloc(XT_ALIGN(m->matchsize), GFP_ATOMIC);
+		if (out == NULL)
+			return -ENOMEM;
+
+		/* We want to reuse existing compat_to_user */
+		old_fs = get_fs();
+		set_fs(KERNEL_DS);
+		m->compat_to_user(out, in);
+		set_fs(old_fs);
+		ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), out);
+		kfree(out);
+	} else
+#endif
+		ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), in);
+
+	return ret;
+}
+
+static inline int nft_compat_match_offset(struct xt_match *match)
+{
+#ifdef CONFIG_COMPAT
+	return xt_compat_match_offset(match);
+#else
+	return 0;
+#endif
+}
+
+static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	void *info = nft_expr_priv(expr);
+	struct xt_match *match = expr->ops->data;
+
+	if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) ||
+	    nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) ||
+	    match_dump_info(skb, match, info))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_match_validate(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nft_data **data)
+{
+	struct xt_match *match = expr->ops->data;
+	unsigned int hook_mask = 0;
+
+	if (ctx->chain->flags & NFT_BASE_CHAIN) {
+		const struct nft_base_chain *basechain =
+						nft_base_chain(ctx->chain);
+		const struct nf_hook_ops *ops = &basechain->ops;
+
+		hook_mask = 1 << ops->hooknum;
+		if (hook_mask & match->hooks)
+			return 0;
+
+		/* This match is being called from an invalid chain */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int
+nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
+		      int event, u16 family, const char *name,
+		      int rev, int target)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	unsigned int flags = portid ? NLM_F_MULTI : 0;
+
+	event |= NFNL_SUBSYS_NFT_COMPAT << 8;
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
+	if (nlh == NULL)
+		goto nlmsg_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family = family;
+	nfmsg->version = NFNETLINK_V0;
+	nfmsg->res_id = 0;
+
+	if (nla_put_string(skb, NFTA_COMPAT_NAME, name) ||
+	    nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) ||
+	    nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target)))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+static int
+nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb,
+		const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+	int ret = 0, target;
+	struct nfgenmsg *nfmsg;
+	const char *fmt;
+	const char *name;
+	u32 rev;
+	struct sk_buff *skb2;
+
+	if (tb[NFTA_COMPAT_NAME] == NULL ||
+	    tb[NFTA_COMPAT_REV] == NULL ||
+	    tb[NFTA_COMPAT_TYPE] == NULL)
+		return -EINVAL;
+
+	name = nla_data(tb[NFTA_COMPAT_NAME]);
+	rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV]));
+	target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE]));
+
+	nfmsg = nlmsg_data(nlh);
+
+	switch(nfmsg->nfgen_family) {
+	case AF_INET:
+		fmt = "ipt_%s";
+		break;
+	case AF_INET6:
+		fmt = "ip6t_%s";
+		break;
+	default:
+		pr_err("nft_compat: unsupported protocol %d\n",
+			nfmsg->nfgen_family);
+		return -EINVAL;
+	}
+
+	try_then_request_module(xt_find_revision(nfmsg->nfgen_family, name,
+						 rev, target, &ret),
+						 fmt, name);
+
+	if (ret < 0)
+		return ret;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (skb2 == NULL)
+		return -ENOMEM;
+
+	/* include the best revision for this extension in the message */
+	if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid,
+				  nlh->nlmsg_seq,
+				  NFNL_MSG_TYPE(nlh->nlmsg_type),
+				  NFNL_MSG_COMPAT_GET,
+				  nfmsg->nfgen_family,
+				  name, ret, target) <= 0) {
+		kfree_skb(skb2);
+		return -ENOSPC;
+	}
+
+	ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
+				MSG_DONTWAIT);
+	if (ret > 0)
+		ret = 0;
+
+	return ret == -EAGAIN ? -ENOBUFS : ret;
+}
+
+static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
+	[NFTA_COMPAT_NAME]	= { .type = NLA_NUL_STRING,
+				    .len = NFT_COMPAT_NAME_MAX-1 },
+	[NFTA_COMPAT_REV]	= { .type = NLA_U32 },
+	[NFTA_COMPAT_TYPE]	= { .type = NLA_U32 },
+};
+
+static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = {
+	[NFNL_MSG_COMPAT_GET]		= { .call = nfnl_compat_get,
+					    .attr_count = NFTA_COMPAT_MAX,
+					    .policy = nfnl_compat_policy_get },
+};
+
+static const struct nfnetlink_subsystem nfnl_compat_subsys = {
+	.name		= "nft-compat",
+	.subsys_id	= NFNL_SUBSYS_NFT_COMPAT,
+	.cb_count	= NFNL_MSG_COMPAT_MAX,
+	.cb		= nfnl_nft_compat_cb,
+};
+
+static LIST_HEAD(nft_match_list);
+
+struct nft_xt {
+	struct list_head	head;
+	struct nft_expr_ops	ops;
+};
+
+static struct nft_expr_type nft_match_type;
+
+static const struct nft_expr_ops *
+nft_match_select_ops(const struct nft_ctx *ctx,
+		     const struct nlattr * const tb[])
+{
+	struct nft_xt *nft_match;
+	struct xt_match *match;
+	char *mt_name;
+	__u32 rev, family;
+
+	if (tb[NFTA_MATCH_NAME] == NULL ||
+	    tb[NFTA_MATCH_REV] == NULL ||
+	    tb[NFTA_MATCH_INFO] == NULL)
+		return ERR_PTR(-EINVAL);
+
+	mt_name = nla_data(tb[NFTA_MATCH_NAME]);
+	rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
+	family = ctx->afi->family;
+
+	/* Re-use the existing match if it's already loaded. */
+	list_for_each_entry(nft_match, &nft_match_list, head) {
+		struct xt_match *match = nft_match->ops.data;
+
+		if (strcmp(match->name, mt_name) == 0 &&
+		    match->revision == rev && match->family == family)
+			return &nft_match->ops;
+	}
+
+	match = xt_request_find_match(family, mt_name, rev);
+	if (IS_ERR(match))
+		return ERR_PTR(-ENOENT);
+
+	/* This is the first time we use this match, allocate operations */
+	nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
+	if (nft_match == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	nft_match->ops.type = &nft_match_type;
+	nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize) +
+					    nft_compat_match_offset(match));
+	nft_match->ops.eval = nft_match_eval;
+	nft_match->ops.init = nft_match_init;
+	nft_match->ops.destroy = nft_match_destroy;
+	nft_match->ops.dump = nft_match_dump;
+	nft_match->ops.validate = nft_match_validate;
+	nft_match->ops.data = match;
+
+	list_add(&nft_match->head, &nft_match_list);
+
+	return &nft_match->ops;
+}
+
+static void nft_match_release(void)
+{
+	struct nft_xt *nft_match, *tmp;
+
+	list_for_each_entry_safe(nft_match, tmp, &nft_match_list, head)
+		kfree(nft_match);
+}
+
+static struct nft_expr_type nft_match_type __read_mostly = {
+	.name		= "match",
+	.select_ops	= nft_match_select_ops,
+	.policy		= nft_match_policy,
+	.maxattr	= NFTA_MATCH_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static LIST_HEAD(nft_target_list);
+
+static struct nft_expr_type nft_target_type;
+
+static const struct nft_expr_ops *
+nft_target_select_ops(const struct nft_ctx *ctx,
+		      const struct nlattr * const tb[])
+{
+	struct nft_xt *nft_target;
+	struct xt_target *target;
+	char *tg_name;
+	__u32 rev, family;
+
+	if (tb[NFTA_TARGET_NAME] == NULL ||
+	    tb[NFTA_TARGET_REV] == NULL ||
+	    tb[NFTA_TARGET_INFO] == NULL)
+		return ERR_PTR(-EINVAL);
+
+	tg_name = nla_data(tb[NFTA_TARGET_NAME]);
+	rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV]));
+	family = ctx->afi->family;
+
+	/* Re-use the existing target if it's already loaded. */
+	list_for_each_entry(nft_target, &nft_match_list, head) {
+		struct xt_target *target = nft_target->ops.data;
+
+		if (strcmp(target->name, tg_name) == 0 &&
+		    target->revision == rev && target->family == family)
+			return &nft_target->ops;
+	}
+
+	target = xt_request_find_target(family, tg_name, rev);
+	if (IS_ERR(target))
+		return ERR_PTR(-ENOENT);
+
+	/* This is the first time we use this target, allocate operations */
+	nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
+	if (nft_target == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	nft_target->ops.type = &nft_target_type;
+	nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize) +
+					     nft_compat_target_offset(target));
+	nft_target->ops.eval = nft_target_eval;
+	nft_target->ops.init = nft_target_init;
+	nft_target->ops.destroy = nft_target_destroy;
+	nft_target->ops.dump = nft_target_dump;
+	nft_target->ops.validate = nft_target_validate;
+	nft_target->ops.data = target;
+
+	list_add(&nft_target->head, &nft_target_list);
+
+	return &nft_target->ops;
+}
+
+static void nft_target_release(void)
+{
+	struct nft_xt *nft_target, *tmp;
+
+	list_for_each_entry_safe(nft_target, tmp, &nft_target_list, head)
+		kfree(nft_target);
+}
+
+static struct nft_expr_type nft_target_type __read_mostly = {
+	.name		= "target",
+	.select_ops	= nft_target_select_ops,
+	.policy		= nft_target_policy,
+	.maxattr	= NFTA_TARGET_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_compat_module_init(void)
+{
+	int ret;
+
+	ret = nft_register_expr(&nft_match_type);
+	if (ret < 0)
+		return ret;
+
+	ret = nft_register_expr(&nft_target_type);
+	if (ret < 0)
+		goto err_match;
+
+	ret = nfnetlink_subsys_register(&nfnl_compat_subsys);
+	if (ret < 0) {
+		pr_err("nft_compat: cannot register with nfnetlink.\n");
+		goto err_target;
+	}
+
+	pr_info("nf_tables_compat: (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>\n");
+
+	return ret;
+
+err_target:
+	nft_unregister_expr(&nft_target_type);
+err_match:
+	nft_unregister_expr(&nft_match_type);
+	return ret;
+}
+
+static void __exit nft_compat_module_exit(void)
+{
+	nfnetlink_subsys_unregister(&nfnl_compat_subsys);
+	nft_unregister_expr(&nft_target_type);
+	nft_unregister_expr(&nft_match_type);
+	nft_match_release();
+	nft_target_release();
+}
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT);
+
+module_init(nft_compat_module_init);
+module_exit(nft_compat_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("match");
+MODULE_ALIAS_NFT_EXPR("target");
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
new file mode 100644
index 0000000..c89ee48
--- /dev/null
+++ b/net/netfilter/nft_counter.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/seqlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_counter {
+	seqlock_t	lock;
+	u64		bytes;
+	u64		packets;
+};
+
+static void nft_counter_eval(const struct nft_expr *expr,
+			     struct nft_data data[NFT_REG_MAX + 1],
+			     const struct nft_pktinfo *pkt)
+{
+	struct nft_counter *priv = nft_expr_priv(expr);
+
+	write_seqlock_bh(&priv->lock);
+	priv->bytes += pkt->skb->len;
+	priv->packets++;
+	write_sequnlock_bh(&priv->lock);
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_counter *priv = nft_expr_priv(expr);
+	unsigned int seq;
+	u64 bytes;
+	u64 packets;
+
+	do {
+		seq = read_seqbegin(&priv->lock);
+		bytes	= priv->bytes;
+		packets	= priv->packets;
+	} while (read_seqretry(&priv->lock, seq));
+
+	if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes)))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
+	[NFTA_COUNTER_PACKETS]	= { .type = NLA_U64 },
+	[NFTA_COUNTER_BYTES]	= { .type = NLA_U64 },
+};
+
+static int nft_counter_init(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nlattr * const tb[])
+{
+	struct nft_counter *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_COUNTER_PACKETS])
+	        priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+	if (tb[NFTA_COUNTER_BYTES])
+		priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+
+	seqlock_init(&priv->lock);
+	return 0;
+}
+
+static struct nft_expr_type nft_counter_type;
+static const struct nft_expr_ops nft_counter_ops = {
+	.type		= &nft_counter_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_counter)),
+	.eval		= nft_counter_eval,
+	.init		= nft_counter_init,
+	.dump		= nft_counter_dump,
+};
+
+static struct nft_expr_type nft_counter_type __read_mostly = {
+	.name		= "counter",
+	.ops		= &nft_counter_ops,
+	.policy		= nft_counter_policy,
+	.maxattr	= NFTA_COUNTER_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_counter_module_init(void)
+{
+	return nft_register_expr(&nft_counter_type);
+}
+
+static void __exit nft_counter_module_exit(void)
+{
+	nft_unregister_expr(&nft_counter_type);
+}
+
+module_init(nft_counter_module_init);
+module_exit(nft_counter_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("counter");
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
new file mode 100644
index 0000000..955f4e6
--- /dev/null
+++ b/net/netfilter/nft_ct.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+
+struct nft_ct {
+	enum nft_ct_keys	key:8;
+	enum ip_conntrack_dir	dir:8;
+	enum nft_registers	dreg:8;
+	uint8_t			family;
+};
+
+static void nft_ct_eval(const struct nft_expr *expr,
+			struct nft_data data[NFT_REG_MAX + 1],
+			const struct nft_pktinfo *pkt)
+{
+	const struct nft_ct *priv = nft_expr_priv(expr);
+	struct nft_data *dest = &data[priv->dreg];
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn *ct;
+	const struct nf_conn_help *help;
+	const struct nf_conntrack_tuple *tuple;
+	const struct nf_conntrack_helper *helper;
+	long diff;
+	unsigned int state;
+
+	ct = nf_ct_get(pkt->skb, &ctinfo);
+
+	switch (priv->key) {
+	case NFT_CT_STATE:
+		if (ct == NULL)
+			state = NF_CT_STATE_INVALID_BIT;
+		else if (nf_ct_is_untracked(ct))
+			state = NF_CT_STATE_UNTRACKED_BIT;
+		else
+			state = NF_CT_STATE_BIT(ctinfo);
+		dest->data[0] = state;
+		return;
+	}
+
+	if (ct == NULL)
+		goto err;
+
+	switch (priv->key) {
+	case NFT_CT_DIRECTION:
+		dest->data[0] = CTINFO2DIR(ctinfo);
+		return;
+	case NFT_CT_STATUS:
+		dest->data[0] = ct->status;
+		return;
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	case NFT_CT_MARK:
+		dest->data[0] = ct->mark;
+		return;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	case NFT_CT_SECMARK:
+		dest->data[0] = ct->secmark;
+		return;
+#endif
+	case NFT_CT_EXPIRATION:
+		diff = (long)jiffies - (long)ct->timeout.expires;
+		if (diff < 0)
+			diff = 0;
+		dest->data[0] = jiffies_to_msecs(diff);
+		return;
+	case NFT_CT_HELPER:
+		if (ct->master == NULL)
+			goto err;
+		help = nfct_help(ct->master);
+		if (help == NULL)
+			goto err;
+		helper = rcu_dereference(help->helper);
+		if (helper == NULL)
+			goto err;
+		if (strlen(helper->name) >= sizeof(dest->data))
+			goto err;
+		strncpy((char *)dest->data, helper->name, sizeof(dest->data));
+		return;
+	}
+
+	tuple = &ct->tuplehash[priv->dir].tuple;
+	switch (priv->key) {
+	case NFT_CT_L3PROTOCOL:
+		dest->data[0] = nf_ct_l3num(ct);
+		return;
+	case NFT_CT_SRC:
+		memcpy(dest->data, tuple->src.u3.all,
+		       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+		return;
+	case NFT_CT_DST:
+		memcpy(dest->data, tuple->dst.u3.all,
+		       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+		return;
+	case NFT_CT_PROTOCOL:
+		dest->data[0] = nf_ct_protonum(ct);
+		return;
+	case NFT_CT_PROTO_SRC:
+		dest->data[0] = (__force __u16)tuple->src.u.all;
+		return;
+	case NFT_CT_PROTO_DST:
+		dest->data[0] = (__force __u16)tuple->dst.u.all;
+		return;
+	}
+	return;
+err:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
+	[NFTA_CT_DREG]		= { .type = NLA_U32 },
+	[NFTA_CT_KEY]		= { .type = NLA_U32 },
+	[NFTA_CT_DIRECTION]	= { .type = NLA_U8 },
+};
+
+static int nft_ct_init(const struct nft_ctx *ctx,
+		       const struct nft_expr *expr,
+		       const struct nlattr * const tb[])
+{
+	struct nft_ct *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_CT_DREG] == NULL ||
+	    tb[NFTA_CT_KEY] == NULL)
+		return -EINVAL;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
+	if (tb[NFTA_CT_DIRECTION] != NULL) {
+		priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+		switch (priv->dir) {
+		case IP_CT_DIR_ORIGINAL:
+		case IP_CT_DIR_REPLY:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	switch (priv->key) {
+	case NFT_CT_STATE:
+	case NFT_CT_DIRECTION:
+	case NFT_CT_STATUS:
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	case NFT_CT_MARK:
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	case NFT_CT_SECMARK:
+#endif
+	case NFT_CT_EXPIRATION:
+	case NFT_CT_HELPER:
+		if (tb[NFTA_CT_DIRECTION] != NULL)
+			return -EINVAL;
+		break;
+	case NFT_CT_PROTOCOL:
+	case NFT_CT_SRC:
+	case NFT_CT_DST:
+	case NFT_CT_PROTO_SRC:
+	case NFT_CT_PROTO_DST:
+		if (tb[NFTA_CT_DIRECTION] == NULL)
+			return -EINVAL;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = nf_ct_l3proto_try_module_get(ctx->afi->family);
+	if (err < 0)
+		return err;
+	priv->family = ctx->afi->family;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		goto err1;
+
+	err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+	if (err < 0)
+		goto err1;
+	return 0;
+
+err1:
+	nf_ct_l3proto_module_put(ctx->afi->family);
+	return err;
+}
+
+static void nft_ct_destroy(const struct nft_expr *expr)
+{
+	struct nft_ct *priv = nft_expr_priv(expr);
+
+	nf_ct_l3proto_module_put(priv->family);
+}
+
+static int nft_ct_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_ct *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_ct_type;
+static const struct nft_expr_ops nft_ct_ops = {
+	.type		= &nft_ct_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ct)),
+	.eval		= nft_ct_eval,
+	.init		= nft_ct_init,
+	.destroy	= nft_ct_destroy,
+	.dump		= nft_ct_dump,
+};
+
+static struct nft_expr_type nft_ct_type __read_mostly = {
+	.name		= "ct",
+	.ops		= &nft_ct_ops,
+	.policy		= nft_ct_policy,
+	.maxattr	= NFTA_CT_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_ct_module_init(void)
+{
+	return nft_register_expr(&nft_ct_type);
+}
+
+static void __exit nft_ct_module_exit(void)
+{
+	nft_unregister_expr(&nft_ct_type);
+}
+
+module_init(nft_ct_module_init);
+module_exit(nft_ct_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("ct");
diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c
new file mode 100644
index 0000000..b6eed4d
--- /dev/null
+++ b/net/netfilter/nft_expr_template.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_template {
+
+};
+
+static void nft_template_eval(const struct nft_expr *expr,
+			      struct nft_data data[NFT_REG_MAX + 1],
+			      const struct nft_pktinfo *pkt)
+{
+	struct nft_template *priv = nft_expr_priv(expr);
+
+}
+
+static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = {
+	[NFTA_TEMPLATE_ATTR]		= { .type = NLA_U32 },
+};
+
+static int nft_template_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_template *priv = nft_expr_priv(expr);
+
+	return 0;
+}
+
+static void nft_template_destroy(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr)
+{
+	struct nft_template *priv = nft_expr_priv(expr);
+
+}
+
+static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_template *priv = nft_expr_priv(expr);
+
+	NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_template_type;
+static const struct nft_expr_ops nft_template_ops = {
+	.type		= &nft_template_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_template)),
+	.eval		= nft_template_eval,
+	.init		= nft_template_init,
+	.destroy	= nft_template_destroy,
+	.dump		= nft_template_dump,
+};
+
+static struct nft_expr_type nft_template_type __read_mostly = {
+	.name		= "template",
+	.ops		= &nft_template_ops,
+	.policy		= nft_template_policy,
+	.maxattr	= NFTA_TEMPLATE_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_template_module_init(void)
+{
+	return nft_register_expr(&nft_template_type);
+}
+
+static void __exit nft_template_module_exit(void)
+{
+	nft_unregister_expr(&nft_template_type);
+}
+
+module_init(nft_template_module_init);
+module_exit(nft_template_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("template");
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
new file mode 100644
index 0000000..8e0bb75
--- /dev/null
+++ b/net/netfilter/nft_exthdr.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+// FIXME:
+#include <net/ipv6.h>
+
+struct nft_exthdr {
+	u8			type;
+	u8			offset;
+	u8			len;
+	enum nft_registers	dreg:8;
+};
+
+static void nft_exthdr_eval(const struct nft_expr *expr,
+			    struct nft_data data[NFT_REG_MAX + 1],
+			    const struct nft_pktinfo *pkt)
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	struct nft_data *dest = &data[priv->dreg];
+	unsigned int offset;
+	int err;
+
+	err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
+	if (err < 0)
+		goto err;
+	offset += priv->offset;
+
+	if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0)
+		goto err;
+	return;
+err:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
+	[NFTA_EXTHDR_DREG]		= { .type = NLA_U32 },
+	[NFTA_EXTHDR_TYPE]		= { .type = NLA_U8 },
+	[NFTA_EXTHDR_OFFSET]		= { .type = NLA_U32 },
+	[NFTA_EXTHDR_LEN]		= { .type = NLA_U32 },
+};
+
+static int nft_exthdr_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_EXTHDR_DREG] == NULL ||
+	    tb[NFTA_EXTHDR_TYPE] == NULL ||
+	    tb[NFTA_EXTHDR_OFFSET] == NULL ||
+	    tb[NFTA_EXTHDR_LEN] == NULL)
+		return -EINVAL;
+
+	priv->type   = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+	priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
+	priv->len    = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+	if (priv->len == 0 ||
+	    priv->len > FIELD_SIZEOF(struct nft_data, data))
+		return -EINVAL;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_exthdr_type;
+static const struct nft_expr_ops nft_exthdr_ops = {
+	.type		= &nft_exthdr_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+	.eval		= nft_exthdr_eval,
+	.init		= nft_exthdr_init,
+	.dump		= nft_exthdr_dump,
+};
+
+static struct nft_expr_type nft_exthdr_type __read_mostly = {
+	.name		= "exthdr",
+	.ops		= &nft_exthdr_ops,
+	.policy		= nft_exthdr_policy,
+	.maxattr	= NFTA_EXTHDR_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_exthdr_module_init(void)
+{
+	return nft_register_expr(&nft_exthdr_type);
+}
+
+static void __exit nft_exthdr_module_exit(void)
+{
+	nft_unregister_expr(&nft_exthdr_type);
+}
+
+module_init(nft_exthdr_module_init);
+module_exit(nft_exthdr_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("exthdr");
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
new file mode 100644
index 0000000..3d3f8fc
--- /dev/null
+++ b/net/netfilter/nft_hash.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_hash {
+	struct hlist_head	*hash;
+	unsigned int		hsize;
+};
+
+struct nft_hash_elem {
+	struct hlist_node	hnode;
+	struct nft_data		key;
+	struct nft_data		data[];
+};
+
+static u32 nft_hash_rnd __read_mostly;
+static bool nft_hash_rnd_initted __read_mostly;
+
+static unsigned int nft_hash_data(const struct nft_data *data,
+				  unsigned int hsize, unsigned int len)
+{
+	unsigned int h;
+
+	h = jhash(data->data, len, nft_hash_rnd);
+	return ((u64)h * hsize) >> 32;
+}
+
+static bool nft_hash_lookup(const struct nft_set *set,
+			    const struct nft_data *key,
+			    struct nft_data *data)
+{
+	const struct nft_hash *priv = nft_set_priv(set);
+	const struct nft_hash_elem *he;
+	unsigned int h;
+
+	h = nft_hash_data(key, priv->hsize, set->klen);
+	hlist_for_each_entry(he, &priv->hash[h], hnode) {
+		if (nft_data_cmp(&he->key, key, set->klen))
+			continue;
+		if (set->flags & NFT_SET_MAP)
+			nft_data_copy(data, he->data);
+		return true;
+	}
+	return false;
+}
+
+static void nft_hash_elem_destroy(const struct nft_set *set,
+				  struct nft_hash_elem *he)
+{
+	nft_data_uninit(&he->key, NFT_DATA_VALUE);
+	if (set->flags & NFT_SET_MAP)
+		nft_data_uninit(he->data, set->dtype);
+	kfree(he);
+}
+
+static int nft_hash_insert(const struct nft_set *set,
+			   const struct nft_set_elem *elem)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he;
+	unsigned int size, h;
+
+	if (elem->flags != 0)
+		return -EINVAL;
+
+	size = sizeof(*he);
+	if (set->flags & NFT_SET_MAP)
+		size += sizeof(he->data[0]);
+
+	he = kzalloc(size, GFP_KERNEL);
+	if (he == NULL)
+		return -ENOMEM;
+
+	nft_data_copy(&he->key, &elem->key);
+	if (set->flags & NFT_SET_MAP)
+		nft_data_copy(he->data, &elem->data);
+
+	h = nft_hash_data(&he->key, priv->hsize, set->klen);
+	hlist_add_head_rcu(&he->hnode, &priv->hash[h]);
+	return 0;
+}
+
+static void nft_hash_remove(const struct nft_set *set,
+			    const struct nft_set_elem *elem)
+{
+	struct nft_hash_elem *he = elem->cookie;
+
+	hlist_del_rcu(&he->hnode);
+	kfree(he);
+}
+
+static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
+{
+	const struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he;
+	unsigned int h;
+
+	h = nft_hash_data(&elem->key, priv->hsize, set->klen);
+	hlist_for_each_entry(he, &priv->hash[h], hnode) {
+		if (nft_data_cmp(&he->key, &elem->key, set->klen))
+			continue;
+
+		elem->cookie = he;
+		elem->flags  = 0;
+		if (set->flags & NFT_SET_MAP)
+			nft_data_copy(&elem->data, he->data);
+		return 0;
+	}
+	return -ENOENT;
+}
+
+static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
+			  struct nft_set_iter *iter)
+{
+	const struct nft_hash *priv = nft_set_priv(set);
+	const struct nft_hash_elem *he;
+	struct nft_set_elem elem;
+	unsigned int i;
+
+	for (i = 0; i < priv->hsize; i++) {
+		hlist_for_each_entry(he, &priv->hash[i], hnode) {
+			if (iter->count < iter->skip)
+				goto cont;
+
+			memcpy(&elem.key, &he->key, sizeof(elem.key));
+			if (set->flags & NFT_SET_MAP)
+				memcpy(&elem.data, he->data, sizeof(elem.data));
+			elem.flags = 0;
+
+			iter->err = iter->fn(ctx, set, iter, &elem);
+			if (iter->err < 0)
+				return;
+cont:
+			iter->count++;
+		}
+	}
+}
+
+static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
+{
+	return sizeof(struct nft_hash);
+}
+
+static int nft_hash_init(const struct nft_set *set,
+			 const struct nlattr * const tb[])
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	unsigned int cnt, i;
+
+	if (unlikely(!nft_hash_rnd_initted)) {
+		get_random_bytes(&nft_hash_rnd, 4);
+		nft_hash_rnd_initted = true;
+	}
+
+	/* Aim for a load factor of 0.75 */
+	// FIXME: temporarily broken until we have set descriptions
+	cnt = 100;
+	cnt = cnt * 4 / 3;
+
+	priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL);
+	if (priv->hash == NULL)
+		return -ENOMEM;
+	priv->hsize = cnt;
+
+	for (i = 0; i < cnt; i++)
+		INIT_HLIST_HEAD(&priv->hash[i]);
+
+	return 0;
+}
+
+static void nft_hash_destroy(const struct nft_set *set)
+{
+	const struct nft_hash *priv = nft_set_priv(set);
+	const struct hlist_node *next;
+	struct nft_hash_elem *elem;
+	unsigned int i;
+
+	for (i = 0; i < priv->hsize; i++) {
+		hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) {
+			hlist_del(&elem->hnode);
+			nft_hash_elem_destroy(set, elem);
+		}
+	}
+	kfree(priv->hash);
+}
+
+static struct nft_set_ops nft_hash_ops __read_mostly = {
+	.privsize       = nft_hash_privsize,
+	.init		= nft_hash_init,
+	.destroy	= nft_hash_destroy,
+	.get		= nft_hash_get,
+	.insert		= nft_hash_insert,
+	.remove		= nft_hash_remove,
+	.lookup		= nft_hash_lookup,
+	.walk		= nft_hash_walk,
+	.features	= NFT_SET_MAP,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_hash_module_init(void)
+{
+	return nft_register_set(&nft_hash_ops);
+}
+
+static void __exit nft_hash_module_exit(void)
+{
+	nft_unregister_set(&nft_hash_ops);
+}
+
+module_init(nft_hash_module_init);
+module_exit(nft_hash_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
new file mode 100644
index 0000000..f169501
--- /dev/null
+++ b/net/netfilter/nft_immediate.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_immediate_expr {
+	struct nft_data		data;
+	enum nft_registers	dreg:8;
+	u8			dlen;
+};
+
+static void nft_immediate_eval(const struct nft_expr *expr,
+			       struct nft_data data[NFT_REG_MAX + 1],
+			       const struct nft_pktinfo *pkt)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	nft_data_copy(&data[priv->dreg], &priv->data);
+}
+
+static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
+	[NFTA_IMMEDIATE_DREG]	= { .type = NLA_U32 },
+	[NFTA_IMMEDIATE_DATA]	= { .type = NLA_NESTED },
+};
+
+static int nft_immediate_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	struct nft_immediate_expr *priv = nft_expr_priv(expr);
+	struct nft_data_desc desc;
+	int err;
+
+	if (tb[NFTA_IMMEDIATE_DREG] == NULL ||
+	    tb[NFTA_IMMEDIATE_DATA] == NULL)
+		return -EINVAL;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+
+	err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
+	if (err < 0)
+		return err;
+	priv->dlen = desc.len;
+
+	err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type);
+	if (err < 0)
+		goto err1;
+
+	return 0;
+
+err1:
+	nft_data_uninit(&priv->data, desc.type);
+	return err;
+}
+
+static void nft_immediate_destroy(const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+	return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
+}
+
+static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+
+	return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data,
+			     nft_dreg_to_type(priv->dreg), priv->dlen);
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_immediate_validate(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr,
+				  const struct nft_data **data)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	if (priv->dreg == NFT_REG_VERDICT)
+		*data = &priv->data;
+
+	return 0;
+}
+
+static struct nft_expr_type nft_imm_type;
+static const struct nft_expr_ops nft_imm_ops = {
+	.type		= &nft_imm_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
+	.eval		= nft_immediate_eval,
+	.init		= nft_immediate_init,
+	.destroy	= nft_immediate_destroy,
+	.dump		= nft_immediate_dump,
+	.validate	= nft_immediate_validate,
+};
+
+static struct nft_expr_type nft_imm_type __read_mostly = {
+	.name		= "immediate",
+	.ops		= &nft_imm_ops,
+	.policy		= nft_immediate_policy,
+	.maxattr	= NFTA_IMMEDIATE_MAX,
+	.owner		= THIS_MODULE,
+};
+
+int __init nft_immediate_module_init(void)
+{
+	return nft_register_expr(&nft_imm_type);
+}
+
+void nft_immediate_module_exit(void)
+{
+	nft_unregister_expr(&nft_imm_type);
+}
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
new file mode 100644
index 0000000..85da5bd
--- /dev/null
+++ b/net/netfilter/nft_limit.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+static DEFINE_SPINLOCK(limit_lock);
+
+struct nft_limit {
+	u64		tokens;
+	u64		rate;
+	u64		unit;
+	unsigned long	stamp;
+};
+
+static void nft_limit_eval(const struct nft_expr *expr,
+			   struct nft_data data[NFT_REG_MAX + 1],
+			   const struct nft_pktinfo *pkt)
+{
+	struct nft_limit *priv = nft_expr_priv(expr);
+
+	spin_lock_bh(&limit_lock);
+	if (time_after_eq(jiffies, priv->stamp)) {
+		priv->tokens = priv->rate;
+		priv->stamp = jiffies + priv->unit * HZ;
+	}
+
+	if (priv->tokens >= 1) {
+		priv->tokens--;
+		spin_unlock_bh(&limit_lock);
+		return;
+	}
+	spin_unlock_bh(&limit_lock);
+
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
+	[NFTA_LIMIT_RATE]	= { .type = NLA_U64 },
+	[NFTA_LIMIT_UNIT]	= { .type = NLA_U64 },
+};
+
+static int nft_limit_init(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nlattr * const tb[])
+{
+	struct nft_limit *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_LIMIT_RATE] == NULL ||
+	    tb[NFTA_LIMIT_UNIT] == NULL)
+		return -EINVAL;
+
+	priv->rate   = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+	priv->unit   = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
+	priv->stamp  = jiffies + priv->unit * HZ;
+	priv->tokens = priv->rate;
+	return 0;
+}
+
+static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_limit *priv = nft_expr_priv(expr);
+
+	if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate)))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_limit_type;
+static const struct nft_expr_ops nft_limit_ops = {
+	.type		= &nft_limit_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit)),
+	.eval		= nft_limit_eval,
+	.init		= nft_limit_init,
+	.dump		= nft_limit_dump,
+};
+
+static struct nft_expr_type nft_limit_type __read_mostly = {
+	.name		= "limit",
+	.ops		= &nft_limit_ops,
+	.policy		= nft_limit_policy,
+	.maxattr	= NFTA_LIMIT_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_limit_module_init(void)
+{
+	return nft_register_expr(&nft_limit_type);
+}
+
+static void __exit nft_limit_module_exit(void)
+{
+	nft_unregister_expr(&nft_limit_type);
+}
+
+module_init(nft_limit_module_init);
+module_exit(nft_limit_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("limit");
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
new file mode 100644
index 0000000..57cad07
--- /dev/null
+++ b/net/netfilter/nft_log.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <linux/netdevice.h>
+
+static const char *nft_log_null_prefix = "";
+
+struct nft_log {
+	struct nf_loginfo	loginfo;
+	char			*prefix;
+	int			family;
+};
+
+static void nft_log_eval(const struct nft_expr *expr,
+			 struct nft_data data[NFT_REG_MAX + 1],
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_log *priv = nft_expr_priv(expr);
+	struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+
+	nf_log_packet(net, priv->family, pkt->hooknum, pkt->skb, pkt->in,
+		      pkt->out, &priv->loginfo, "%s", priv->prefix);
+}
+
+static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
+	[NFTA_LOG_GROUP]	= { .type = NLA_U16 },
+	[NFTA_LOG_PREFIX]	= { .type = NLA_STRING },
+	[NFTA_LOG_SNAPLEN]	= { .type = NLA_U32 },
+	[NFTA_LOG_QTHRESHOLD]	= { .type = NLA_U16 },
+};
+
+static int nft_log_init(const struct nft_ctx *ctx,
+			const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_log *priv = nft_expr_priv(expr);
+	struct nf_loginfo *li = &priv->loginfo;
+	const struct nlattr *nla;
+
+	priv->family = ctx->afi->family;
+
+	nla = tb[NFTA_LOG_PREFIX];
+	if (nla != NULL) {
+		priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL);
+		if (priv->prefix == NULL)
+			return -ENOMEM;
+		nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1);
+	} else
+		priv->prefix = (char *)nft_log_null_prefix;
+
+	li->type = NF_LOG_TYPE_ULOG;
+	if (tb[NFTA_LOG_GROUP] != NULL)
+		li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
+
+	if (tb[NFTA_LOG_SNAPLEN] != NULL)
+		li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
+	if (tb[NFTA_LOG_QTHRESHOLD] != NULL) {
+		li->u.ulog.qthreshold =
+			ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD]));
+	}
+
+	return 0;
+}
+
+static void nft_log_destroy(const struct nft_expr *expr)
+{
+	struct nft_log *priv = nft_expr_priv(expr);
+
+	if (priv->prefix != nft_log_null_prefix)
+		kfree(priv->prefix);
+}
+
+static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_log *priv = nft_expr_priv(expr);
+	const struct nf_loginfo *li = &priv->loginfo;
+
+	if (priv->prefix != nft_log_null_prefix)
+		if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix))
+			goto nla_put_failure;
+	if (li->u.ulog.group)
+		if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
+			goto nla_put_failure;
+	if (li->u.ulog.copy_len)
+		if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
+				 htonl(li->u.ulog.copy_len)))
+			goto nla_put_failure;
+	if (li->u.ulog.qthreshold)
+		if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD,
+				 htons(li->u.ulog.qthreshold)))
+			goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_log_type;
+static const struct nft_expr_ops nft_log_ops = {
+	.type		= &nft_log_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_log)),
+	.eval		= nft_log_eval,
+	.init		= nft_log_init,
+	.destroy	= nft_log_destroy,
+	.dump		= nft_log_dump,
+};
+
+static struct nft_expr_type nft_log_type __read_mostly = {
+	.name		= "log",
+	.ops		= &nft_log_ops,
+	.policy		= nft_log_policy,
+	.maxattr	= NFTA_LOG_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_log_module_init(void)
+{
+	return nft_register_expr(&nft_log_type);
+}
+
+static void __exit nft_log_module_exit(void)
+{
+	nft_unregister_expr(&nft_log_type);
+}
+
+module_init(nft_log_module_init);
+module_exit(nft_log_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("log");
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
new file mode 100644
index 0000000..8a6116b
--- /dev/null
+++ b/net/netfilter/nft_lookup.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_lookup {
+	struct nft_set			*set;
+	enum nft_registers		sreg:8;
+	enum nft_registers		dreg:8;
+	struct nft_set_binding		binding;
+};
+
+static void nft_lookup_eval(const struct nft_expr *expr,
+			    struct nft_data data[NFT_REG_MAX + 1],
+			    const struct nft_pktinfo *pkt)
+{
+	const struct nft_lookup *priv = nft_expr_priv(expr);
+	const struct nft_set *set = priv->set;
+
+	if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg]))
+		return;
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
+	[NFTA_LOOKUP_SET]	= { .type = NLA_STRING },
+	[NFTA_LOOKUP_SREG]	= { .type = NLA_U32 },
+	[NFTA_LOOKUP_DREG]	= { .type = NLA_U32 },
+};
+
+static int nft_lookup_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_lookup *priv = nft_expr_priv(expr);
+	struct nft_set *set;
+	int err;
+
+	if (tb[NFTA_LOOKUP_SET] == NULL ||
+	    tb[NFTA_LOOKUP_SREG] == NULL)
+		return -EINVAL;
+
+	set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
+	err = nft_validate_input_register(priv->sreg);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_LOOKUP_DREG] != NULL) {
+		if (!(set->flags & NFT_SET_MAP))
+			return -EINVAL;
+
+		priv->dreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_DREG]));
+		err = nft_validate_output_register(priv->dreg);
+		if (err < 0)
+			return err;
+
+		if (priv->dreg == NFT_REG_VERDICT) {
+			if (set->dtype != NFT_DATA_VERDICT)
+				return -EINVAL;
+		} else if (set->dtype == NFT_DATA_VERDICT)
+			return -EINVAL;
+	} else if (set->flags & NFT_SET_MAP)
+		return -EINVAL;
+
+	err = nf_tables_bind_set(ctx, set, &priv->binding);
+	if (err < 0)
+		return err;
+
+	priv->set = set;
+	return 0;
+}
+
+static void nft_lookup_destroy(const struct nft_expr *expr)
+{
+	struct nft_lookup *priv = nft_expr_priv(expr);
+
+	nf_tables_unbind_set(NULL, priv->set, &priv->binding);
+}
+
+static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_lookup *priv = nft_expr_priv(expr);
+
+	if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_LOOKUP_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (priv->set->flags & NFT_SET_MAP)
+		if (nla_put_be32(skb, NFTA_LOOKUP_DREG, htonl(priv->dreg)))
+			goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_lookup_type;
+static const struct nft_expr_ops nft_lookup_ops = {
+	.type		= &nft_lookup_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
+	.eval		= nft_lookup_eval,
+	.init		= nft_lookup_init,
+	.destroy	= nft_lookup_destroy,
+	.dump		= nft_lookup_dump,
+};
+
+static struct nft_expr_type nft_lookup_type __read_mostly = {
+	.name		= "lookup",
+	.ops		= &nft_lookup_ops,
+	.policy		= nft_lookup_policy,
+	.maxattr	= NFTA_LOOKUP_MAX,
+	.owner		= THIS_MODULE,
+};
+
+int __init nft_lookup_module_init(void)
+{
+	return nft_register_expr(&nft_lookup_type);
+}
+
+void nft_lookup_module_exit(void)
+{
+	nft_unregister_expr(&nft_lookup_type);
+}
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
new file mode 100644
index 0000000..8c28220
--- /dev/null
+++ b/net/netfilter/nft_meta.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
+#include <net/netfilter/nf_tables.h>
+
+struct nft_meta {
+	enum nft_meta_keys	key:8;
+	enum nft_registers	dreg:8;
+};
+
+static void nft_meta_eval(const struct nft_expr *expr,
+			  struct nft_data data[NFT_REG_MAX + 1],
+			  const struct nft_pktinfo *pkt)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+	const struct sk_buff *skb = pkt->skb;
+	const struct net_device *in = pkt->in, *out = pkt->out;
+	struct nft_data *dest = &data[priv->dreg];
+
+	switch (priv->key) {
+	case NFT_META_LEN:
+		dest->data[0] = skb->len;
+		break;
+	case NFT_META_PROTOCOL:
+		*(__be16 *)dest->data = skb->protocol;
+		break;
+	case NFT_META_PRIORITY:
+		dest->data[0] = skb->priority;
+		break;
+	case NFT_META_MARK:
+		dest->data[0] = skb->mark;
+		break;
+	case NFT_META_IIF:
+		if (in == NULL)
+			goto err;
+		dest->data[0] = in->ifindex;
+		break;
+	case NFT_META_OIF:
+		if (out == NULL)
+			goto err;
+		dest->data[0] = out->ifindex;
+		break;
+	case NFT_META_IIFNAME:
+		if (in == NULL)
+			goto err;
+		strncpy((char *)dest->data, in->name, sizeof(dest->data));
+		break;
+	case NFT_META_OIFNAME:
+		if (out == NULL)
+			goto err;
+		strncpy((char *)dest->data, out->name, sizeof(dest->data));
+		break;
+	case NFT_META_IIFTYPE:
+		if (in == NULL)
+			goto err;
+		*(u16 *)dest->data = in->type;
+		break;
+	case NFT_META_OIFTYPE:
+		if (out == NULL)
+			goto err;
+		*(u16 *)dest->data = out->type;
+		break;
+	case NFT_META_SKUID:
+		if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+			goto err;
+
+		read_lock_bh(&skb->sk->sk_callback_lock);
+		if (skb->sk->sk_socket == NULL ||
+		    skb->sk->sk_socket->file == NULL) {
+			read_unlock_bh(&skb->sk->sk_callback_lock);
+			goto err;
+		}
+
+		dest->data[0] =
+			from_kuid_munged(&init_user_ns,
+				skb->sk->sk_socket->file->f_cred->fsuid);
+		read_unlock_bh(&skb->sk->sk_callback_lock);
+		break;
+	case NFT_META_SKGID:
+		if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+			goto err;
+
+		read_lock_bh(&skb->sk->sk_callback_lock);
+		if (skb->sk->sk_socket == NULL ||
+		    skb->sk->sk_socket->file == NULL) {
+			read_unlock_bh(&skb->sk->sk_callback_lock);
+			goto err;
+		}
+		dest->data[0] =
+			from_kgid_munged(&init_user_ns,
+				 skb->sk->sk_socket->file->f_cred->fsgid);
+		read_unlock_bh(&skb->sk->sk_callback_lock);
+		break;
+#ifdef CONFIG_NET_CLS_ROUTE
+	case NFT_META_RTCLASSID: {
+		const struct dst_entry *dst = skb_dst(skb);
+
+		if (dst == NULL)
+			goto err;
+		dest->data[0] = dst->tclassid;
+		break;
+	}
+#endif
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+		dest->data[0] = skb->secmark;
+		break;
+#endif
+	default:
+		WARN_ON(1);
+		goto err;
+	}
+	return;
+
+err:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+	[NFTA_META_DREG]	= { .type = NLA_U32 },
+	[NFTA_META_KEY]		= { .type = NLA_U32 },
+};
+
+static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			 const struct nlattr * const tb[])
+{
+	struct nft_meta *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_META_DREG] == NULL ||
+	    tb[NFTA_META_KEY] == NULL)
+		return -EINVAL;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+	switch (priv->key) {
+	case NFT_META_LEN:
+	case NFT_META_PROTOCOL:
+	case NFT_META_PRIORITY:
+	case NFT_META_MARK:
+	case NFT_META_IIF:
+	case NFT_META_OIF:
+	case NFT_META_IIFNAME:
+	case NFT_META_OIFNAME:
+	case NFT_META_IIFTYPE:
+	case NFT_META_OIFTYPE:
+	case NFT_META_SKUID:
+	case NFT_META_SKGID:
+#ifdef CONFIG_NET_CLS_ROUTE
+	case NFT_META_RTCLASSID:
+#endif
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+#endif
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_meta_type;
+static const struct nft_expr_ops nft_meta_ops = {
+	.type		= &nft_meta_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+	.eval		= nft_meta_eval,
+	.init		= nft_meta_init,
+	.dump		= nft_meta_dump,
+};
+
+static struct nft_expr_type nft_meta_type __read_mostly = {
+	.name		= "meta",
+	.ops		= &nft_meta_ops,
+	.policy		= nft_meta_policy,
+	.maxattr	= NFTA_META_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_meta_module_init(void)
+{
+	return nft_register_expr(&nft_meta_type);
+}
+
+static void __exit nft_meta_module_exit(void)
+{
+	nft_unregister_expr(&nft_meta_type);
+}
+
+module_init(nft_meta_module_init);
+module_exit(nft_meta_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c
new file mode 100644
index 0000000..71177df
--- /dev/null
+++ b/net/netfilter/nft_meta_target.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_meta {
+	enum nft_meta_keys	key;
+};
+
+static void nft_meta_eval(const struct nft_expr *expr,
+			  struct nft_data *nfres,
+			  struct nft_data *data,
+			  const struct nft_pktinfo *pkt)
+{
+	const struct nft_meta *meta = nft_expr_priv(expr);
+	struct sk_buff *skb = pkt->skb;
+	u32 val = data->data[0];
+
+	switch (meta->key) {
+	case NFT_META_MARK:
+		skb->mark = val;
+		break;
+	case NFT_META_PRIORITY:
+		skb->priority = val;
+		break;
+	case NFT_META_NFTRACE:
+		skb->nf_trace = val;
+		break;
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+		skb->secmark = val;
+		break;
+#endif
+	default:
+		WARN_ON(1);
+	}
+}
+
+static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+	[NFTA_META_KEY]		= { .type = NLA_U32 },
+};
+
+static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[])
+{
+	struct nft_meta *meta = nft_expr_priv(expr);
+
+	if (tb[NFTA_META_KEY] == NULL)
+		return -EINVAL;
+
+	meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+	switch (meta->key) {
+	case NFT_META_MARK:
+	case NFT_META_PRIORITY:
+	case NFT_META_NFTRACE:
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+#endif
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_meta *meta = nft_expr_priv(expr);
+
+	NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key));
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops meta_target __read_mostly = {
+	.name		= "meta",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_meta_eval,
+	.init		= nft_meta_init,
+	.dump		= nft_meta_dump,
+	.policy		= nft_meta_policy,
+	.maxattr	= NFTA_META_MAX,
+};
+
+static int __init nft_meta_target_init(void)
+{
+	return nft_register_expr(&meta_target);
+}
+
+static void __exit nft_meta_target_exit(void)
+{
+	nft_unregister_expr(&meta_target);
+}
+
+module_init(nft_meta_target_init);
+module_exit(nft_meta_target_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
new file mode 100644
index 0000000..d3b1ffe
--- /dev/null
+++ b/net/netfilter/nft_nat.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/string.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ip.h>
+
+struct nft_nat {
+	enum nft_registers      sreg_addr_min:8;
+	enum nft_registers      sreg_addr_max:8;
+	enum nft_registers      sreg_proto_min:8;
+	enum nft_registers      sreg_proto_max:8;
+	int                     family;
+	enum nf_nat_manip_type  type;
+};
+
+static void nft_nat_eval(const struct nft_expr *expr,
+			 struct nft_data data[NFT_REG_MAX + 1],
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_nat *priv = nft_expr_priv(expr);
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo);
+	struct nf_nat_range range;
+
+	memset(&range, 0, sizeof(range));
+	if (priv->sreg_addr_min) {
+		if (priv->family == AF_INET) {
+			range.min_addr.ip = (__force __be32)
+					data[priv->sreg_addr_min].data[0];
+			range.max_addr.ip = (__force __be32)
+					data[priv->sreg_addr_max].data[0];
+
+		} else {
+			memcpy(range.min_addr.ip6,
+			       data[priv->sreg_addr_min].data,
+			       sizeof(struct nft_data));
+			memcpy(range.max_addr.ip6,
+			       data[priv->sreg_addr_max].data,
+			       sizeof(struct nft_data));
+		}
+		range.flags |= NF_NAT_RANGE_MAP_IPS;
+	}
+
+	if (priv->sreg_proto_min) {
+		range.min_proto.all = (__force __be16)
+					data[priv->sreg_proto_min].data[0];
+		range.max_proto.all = (__force __be16)
+					data[priv->sreg_proto_max].data[0];
+		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+
+	data[NFT_REG_VERDICT].verdict =
+		nf_nat_setup_info(ct, &range, priv->type);
+}
+
+static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
+	[NFTA_NAT_TYPE]		 = { .type = NLA_U32 },
+	[NFTA_NAT_FAMILY]	 = { .type = NLA_U32 },
+	[NFTA_NAT_REG_ADDR_MIN]	 = { .type = NLA_U32 },
+	[NFTA_NAT_REG_ADDR_MAX]	 = { .type = NLA_U32 },
+	[NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 },
+	[NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 },
+};
+
+static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_nat *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_NAT_TYPE] == NULL)
+		return -EINVAL;
+
+	switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) {
+	case NFT_NAT_SNAT:
+		priv->type = NF_NAT_MANIP_SRC;
+		break;
+	case NFT_NAT_DNAT:
+		priv->type = NF_NAT_MANIP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (tb[NFTA_NAT_FAMILY] == NULL)
+		return -EINVAL;
+
+	priv->family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
+	if (priv->family != AF_INET && priv->family != AF_INET6)
+		return -EINVAL;
+
+	if (tb[NFTA_NAT_REG_ADDR_MIN]) {
+		priv->sreg_addr_min = ntohl(nla_get_be32(
+						tb[NFTA_NAT_REG_ADDR_MIN]));
+		err = nft_validate_input_register(priv->sreg_addr_min);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[NFTA_NAT_REG_ADDR_MAX]) {
+		priv->sreg_addr_max = ntohl(nla_get_be32(
+						tb[NFTA_NAT_REG_ADDR_MAX]));
+		err = nft_validate_input_register(priv->sreg_addr_max);
+		if (err < 0)
+			return err;
+	} else
+		priv->sreg_addr_max = priv->sreg_addr_min;
+
+	if (tb[NFTA_NAT_REG_PROTO_MIN]) {
+		priv->sreg_proto_min = ntohl(nla_get_be32(
+						tb[NFTA_NAT_REG_PROTO_MIN]));
+		err = nft_validate_input_register(priv->sreg_proto_min);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[NFTA_NAT_REG_PROTO_MAX]) {
+		priv->sreg_proto_max = ntohl(nla_get_be32(
+						tb[NFTA_NAT_REG_PROTO_MAX]));
+		err = nft_validate_input_register(priv->sreg_proto_max);
+		if (err < 0)
+			return err;
+	} else
+		priv->sreg_proto_max = priv->sreg_proto_min;
+
+	return 0;
+}
+
+static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_nat *priv = nft_expr_priv(expr);
+
+	switch (priv->type) {
+	case NF_NAT_MANIP_SRC:
+		if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT)))
+			goto nla_put_failure;
+		break;
+	case NF_NAT_MANIP_DST:
+		if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT)))
+			goto nla_put_failure;
+		break;
+	}
+
+	if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb,
+			 NFTA_NAT_REG_ADDR_MIN, htonl(priv->sreg_addr_min)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb,
+			 NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb,
+			 NFTA_NAT_REG_PROTO_MIN, htonl(priv->sreg_proto_min)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb,
+			 NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_nat_type;
+static const struct nft_expr_ops nft_nat_ops = {
+	.type           = &nft_nat_type,
+	.size           = NFT_EXPR_SIZE(sizeof(struct nft_nat)),
+	.eval           = nft_nat_eval,
+	.init           = nft_nat_init,
+	.dump           = nft_nat_dump,
+};
+
+static struct nft_expr_type nft_nat_type __read_mostly = {
+	.name           = "nat",
+	.ops            = &nft_nat_ops,
+	.policy         = nft_nat_policy,
+	.maxattr        = NFTA_NAT_MAX,
+	.owner          = THIS_MODULE,
+};
+
+static int __init nft_nat_module_init(void)
+{
+	int err;
+
+	err = nft_register_expr(&nft_nat_type);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static void __exit nft_nat_module_exit(void)
+{
+	nft_unregister_expr(&nft_nat_type);
+}
+
+module_init(nft_nat_module_init);
+module_exit(nft_nat_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_EXPR("nat");
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
new file mode 100644
index 0000000..a2aeb31
--- /dev/null
+++ b/net/netfilter/nft_payload.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+static void nft_payload_eval(const struct nft_expr *expr,
+			     struct nft_data data[NFT_REG_MAX + 1],
+			     const struct nft_pktinfo *pkt)
+{
+	const struct nft_payload *priv = nft_expr_priv(expr);
+	const struct sk_buff *skb = pkt->skb;
+	struct nft_data *dest = &data[priv->dreg];
+	int offset;
+
+	switch (priv->base) {
+	case NFT_PAYLOAD_LL_HEADER:
+		if (!skb_mac_header_was_set(skb))
+			goto err;
+		offset = skb_mac_header(skb) - skb->data;
+		break;
+	case NFT_PAYLOAD_NETWORK_HEADER:
+		offset = skb_network_offset(skb);
+		break;
+	case NFT_PAYLOAD_TRANSPORT_HEADER:
+		offset = pkt->xt.thoff;
+		break;
+	default:
+		BUG();
+	}
+	offset += priv->offset;
+
+	if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0)
+		goto err;
+	return;
+err:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
+	[NFTA_PAYLOAD_DREG]	= { .type = NLA_U32 },
+	[NFTA_PAYLOAD_BASE]	= { .type = NLA_U32 },
+	[NFTA_PAYLOAD_OFFSET]	= { .type = NLA_U32 },
+	[NFTA_PAYLOAD_LEN]	= { .type = NLA_U32 },
+};
+
+static int nft_payload_init(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nlattr * const tb[])
+{
+	struct nft_payload *priv = nft_expr_priv(expr);
+	int err;
+
+	priv->base   = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+	priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+	priv->len    = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_payload *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_payload_type;
+static const struct nft_expr_ops nft_payload_ops = {
+	.type		= &nft_payload_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_payload)),
+	.eval		= nft_payload_eval,
+	.init		= nft_payload_init,
+	.dump		= nft_payload_dump,
+};
+
+const struct nft_expr_ops nft_payload_fast_ops = {
+	.type		= &nft_payload_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_payload)),
+	.eval		= nft_payload_eval,
+	.init		= nft_payload_init,
+	.dump		= nft_payload_dump,
+};
+
+static const struct nft_expr_ops *
+nft_payload_select_ops(const struct nft_ctx *ctx,
+		       const struct nlattr * const tb[])
+{
+	enum nft_payload_bases base;
+	unsigned int offset, len;
+
+	if (tb[NFTA_PAYLOAD_DREG] == NULL ||
+	    tb[NFTA_PAYLOAD_BASE] == NULL ||
+	    tb[NFTA_PAYLOAD_OFFSET] == NULL ||
+	    tb[NFTA_PAYLOAD_LEN] == NULL)
+		return ERR_PTR(-EINVAL);
+
+	base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+	switch (base) {
+	case NFT_PAYLOAD_LL_HEADER:
+	case NFT_PAYLOAD_NETWORK_HEADER:
+	case NFT_PAYLOAD_TRANSPORT_HEADER:
+		break;
+	default:
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+	len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+	if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data))
+		return ERR_PTR(-EINVAL);
+
+	if (len <= 4 && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER)
+		return &nft_payload_fast_ops;
+	else
+		return &nft_payload_ops;
+}
+
+static struct nft_expr_type nft_payload_type __read_mostly = {
+	.name		= "payload",
+	.select_ops	= nft_payload_select_ops,
+	.policy		= nft_payload_policy,
+	.maxattr	= NFTA_PAYLOAD_MAX,
+	.owner		= THIS_MODULE,
+};
+
+int __init nft_payload_module_init(void)
+{
+	return nft_register_expr(&nft_payload_type);
+}
+
+void nft_payload_module_exit(void)
+{
+	nft_unregister_expr(&nft_payload_type);
+}
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
new file mode 100644
index 0000000..ca0c1b2
--- /dev/null
+++ b/net/netfilter/nft_rbtree.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_rbtree {
+	struct rb_root		root;
+};
+
+struct nft_rbtree_elem {
+	struct rb_node		node;
+	u16			flags;
+	struct nft_data		key;
+	struct nft_data		data[];
+};
+
+static bool nft_rbtree_lookup(const struct nft_set *set,
+			      const struct nft_data *key,
+			      struct nft_data *data)
+{
+	const struct nft_rbtree *priv = nft_set_priv(set);
+	const struct nft_rbtree_elem *rbe, *interval = NULL;
+	const struct rb_node *parent = priv->root.rb_node;
+	int d;
+
+	while (parent != NULL) {
+		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+		d = nft_data_cmp(&rbe->key, key, set->klen);
+		if (d < 0) {
+			parent = parent->rb_left;
+			interval = rbe;
+		} else if (d > 0)
+			parent = parent->rb_right;
+		else {
+found:
+			if (rbe->flags & NFT_SET_ELEM_INTERVAL_END)
+				goto out;
+			if (set->flags & NFT_SET_MAP)
+				nft_data_copy(data, rbe->data);
+			return true;
+		}
+	}
+
+	if (set->flags & NFT_SET_INTERVAL && interval != NULL) {
+		rbe = interval;
+		goto found;
+	}
+out:
+	return false;
+}
+
+static void nft_rbtree_elem_destroy(const struct nft_set *set,
+				    struct nft_rbtree_elem *rbe)
+{
+	nft_data_uninit(&rbe->key, NFT_DATA_VALUE);
+	if (set->flags & NFT_SET_MAP)
+		nft_data_uninit(rbe->data, set->dtype);
+	kfree(rbe);
+}
+
+static int __nft_rbtree_insert(const struct nft_set *set,
+			       struct nft_rbtree_elem *new)
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+	struct nft_rbtree_elem *rbe;
+	struct rb_node *parent, **p;
+	int d;
+
+	parent = NULL;
+	p = &priv->root.rb_node;
+	while (*p != NULL) {
+		parent = *p;
+		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+		d = nft_data_cmp(&rbe->key, &new->key, set->klen);
+		if (d < 0)
+			p = &parent->rb_left;
+		else if (d > 0)
+			p = &parent->rb_right;
+		else
+			return -EEXIST;
+	}
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, &priv->root);
+	return 0;
+}
+
+static int nft_rbtree_insert(const struct nft_set *set,
+			     const struct nft_set_elem *elem)
+{
+	struct nft_rbtree_elem *rbe;
+	unsigned int size;
+	int err;
+
+	size = sizeof(*rbe);
+	if (set->flags & NFT_SET_MAP)
+		size += sizeof(rbe->data[0]);
+
+	rbe = kzalloc(size, GFP_KERNEL);
+	if (rbe == NULL)
+		return -ENOMEM;
+
+	rbe->flags = elem->flags;
+	nft_data_copy(&rbe->key, &elem->key);
+	if (set->flags & NFT_SET_MAP)
+		nft_data_copy(rbe->data, &elem->data);
+
+	err = __nft_rbtree_insert(set, rbe);
+	if (err < 0)
+		kfree(rbe);
+	return err;
+}
+
+static void nft_rbtree_remove(const struct nft_set *set,
+			      const struct nft_set_elem *elem)
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+	struct nft_rbtree_elem *rbe = elem->cookie;
+
+	rb_erase(&rbe->node, &priv->root);
+	kfree(rbe);
+}
+
+static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
+{
+	const struct nft_rbtree *priv = nft_set_priv(set);
+	const struct rb_node *parent = priv->root.rb_node;
+	struct nft_rbtree_elem *rbe;
+	int d;
+
+	while (parent != NULL) {
+		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+		d = nft_data_cmp(&rbe->key, &elem->key, set->klen);
+		if (d < 0)
+			parent = parent->rb_left;
+		else if (d > 0)
+			parent = parent->rb_right;
+		else {
+			elem->cookie = rbe;
+			if (set->flags & NFT_SET_MAP)
+				nft_data_copy(&elem->data, rbe->data);
+			elem->flags = rbe->flags;
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static void nft_rbtree_walk(const struct nft_ctx *ctx,
+			    const struct nft_set *set,
+			    struct nft_set_iter *iter)
+{
+	const struct nft_rbtree *priv = nft_set_priv(set);
+	const struct nft_rbtree_elem *rbe;
+	struct nft_set_elem elem;
+	struct rb_node *node;
+
+	for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+		if (iter->count < iter->skip)
+			goto cont;
+
+		rbe = rb_entry(node, struct nft_rbtree_elem, node);
+		nft_data_copy(&elem.key, &rbe->key);
+		if (set->flags & NFT_SET_MAP)
+			nft_data_copy(&elem.data, rbe->data);
+		elem.flags = rbe->flags;
+
+		iter->err = iter->fn(ctx, set, iter, &elem);
+		if (iter->err < 0)
+			return;
+cont:
+		iter->count++;
+	}
+}
+
+static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
+{
+	return sizeof(struct nft_rbtree);
+}
+
+static int nft_rbtree_init(const struct nft_set *set,
+			   const struct nlattr * const nla[])
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+
+	priv->root = RB_ROOT;
+	return 0;
+}
+
+static void nft_rbtree_destroy(const struct nft_set *set)
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+	struct nft_rbtree_elem *rbe;
+	struct rb_node *node;
+
+	while ((node = priv->root.rb_node) != NULL) {
+		rb_erase(node, &priv->root);
+		rbe = rb_entry(node, struct nft_rbtree_elem, node);
+		nft_rbtree_elem_destroy(set, rbe);
+	}
+}
+
+static struct nft_set_ops nft_rbtree_ops __read_mostly = {
+	.privsize	= nft_rbtree_privsize,
+	.init		= nft_rbtree_init,
+	.destroy	= nft_rbtree_destroy,
+	.insert		= nft_rbtree_insert,
+	.remove		= nft_rbtree_remove,
+	.get		= nft_rbtree_get,
+	.lookup		= nft_rbtree_lookup,
+	.walk		= nft_rbtree_walk,
+	.features	= NFT_SET_INTERVAL | NFT_SET_MAP,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_rbtree_module_init(void)
+{
+	return nft_register_set(&nft_rbtree_ops);
+}
+
+static void __exit nft_rbtree_module_exit(void)
+{
+	nft_unregister_set(&nft_rbtree_ops);
+}
+
+module_init(nft_rbtree_module_init);
+module_exit(nft_rbtree_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8b03028..227aa11 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -845,8 +845,13 @@ xt_replace_table(struct xt_table *table,
 		return NULL;
 	}
 
-	table->private = newinfo;
 	newinfo->initial_entries = private->initial_entries;
+	/*
+	 * Ensure contents of newinfo are visible before assigning to
+	 * private.
+	 */
+	smp_wmb();
+	table->private = newinfo;
 
 	/*
 	 * Even though table entries have now been swapped, other CPU's
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index a60261c..da35ac0 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -26,6 +26,9 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
 	if (skb->nfct != NULL)
 		return XT_CONTINUE;
 
+	/* special case the untracked ct : we want the percpu object */
+	if (!ct)
+		ct = nf_ct_untracked_get();
 	atomic_inc(&ct->ct_general.use);
 	skb->nfct = &ct->ct_general;
 	skb->nfctinfo = IP_CT_NEW;
@@ -186,8 +189,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
 	int ret = -EOPNOTSUPP;
 
 	if (info->flags & XT_CT_NOTRACK) {
-		ct = nf_ct_untracked_get();
-		atomic_inc(&ct->ct_general.use);
+		ct = NULL;
 		goto out;
 	}
 
@@ -311,7 +313,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
 	struct nf_conn *ct = info->ct;
 	struct nf_conn_help *help;
 
-	if (!nf_ct_is_untracked(ct)) {
+	if (ct && !nf_ct_is_untracked(ct)) {
 		help = nfct_help(ct);
 		if (help)
 			module_put(help->helper->me);
@@ -319,8 +321,8 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
 		nf_ct_l3proto_module_put(par->family);
 
 		xt_ct_destroy_timeout(ct);
+		nf_ct_put(info->ct);
 	}
-	nf_ct_put(info->ct);
 }
 
 static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 1e2fae3..ed00fef 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -147,6 +147,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_NFQ_info_v3 *info = par->targinfo;
 	u32 queue = info->queuenum;
+	int ret;
 
 	if (info->queues_total > 1) {
 		if (info->flags & NFQ_FLAG_CPU_FANOUT) {
@@ -157,7 +158,11 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
 			queue = nfqueue_hash(skb, par);
 	}
 
-	return NF_QUEUE_NR(queue);
+	ret = NF_QUEUE_NR(queue);
+	if (info->flags & NFQ_FLAG_BYPASS)
+		ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+
+	return ret;
 }
 
 static struct xt_target nfqueue_tg_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 7011c71..e762de5 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -43,35 +43,74 @@ optlen(const u_int8_t *opt, unsigned int offset)
 		return opt[offset+1];
 }
 
+static u_int32_t tcpmss_reverse_mtu(struct net *net,
+				    const struct sk_buff *skb,
+				    unsigned int family)
+{
+	struct flowi fl;
+	const struct nf_afinfo *ai;
+	struct rtable *rt = NULL;
+	u_int32_t mtu     = ~0U;
+
+	if (family == PF_INET) {
+		struct flowi4 *fl4 = &fl.u.ip4;
+		memset(fl4, 0, sizeof(*fl4));
+		fl4->daddr = ip_hdr(skb)->saddr;
+	} else {
+		struct flowi6 *fl6 = &fl.u.ip6;
+
+		memset(fl6, 0, sizeof(*fl6));
+		fl6->daddr = ipv6_hdr(skb)->saddr;
+	}
+	rcu_read_lock();
+	ai = nf_get_afinfo(family);
+	if (ai != NULL)
+		ai->route(net, (struct dst_entry **)&rt, &fl, false);
+	rcu_read_unlock();
+
+	if (rt != NULL) {
+		mtu = dst_mtu(&rt->dst);
+		dst_release(&rt->dst);
+	}
+	return mtu;
+}
+
 static int
 tcpmss_mangle_packet(struct sk_buff *skb,
 		     const struct xt_action_param *par,
-		     unsigned int in_mtu,
+		     unsigned int family,
 		     unsigned int tcphoff,
 		     unsigned int minlen)
 {
 	const struct xt_tcpmss_info *info = par->targinfo;
 	struct tcphdr *tcph;
-	unsigned int tcplen, i;
+	int len, tcp_hdrlen;
+	unsigned int i;
 	__be16 oldval;
 	u16 newmss;
 	u8 *opt;
 
 	/* This is a fragment, no TCP header is available */
 	if (par->fragoff != 0)
-		return XT_CONTINUE;
+		return 0;
 
 	if (!skb_make_writable(skb, skb->len))
 		return -1;
 
-	tcplen = skb->len - tcphoff;
+	len = skb->len - tcphoff;
+	if (len < (int)sizeof(struct tcphdr))
+		return -1;
+
 	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+	tcp_hdrlen = tcph->doff * 4;
 
-	/* Header cannot be larger than the packet */
-	if (tcplen < tcph->doff*4)
+	if (len < tcp_hdrlen)
 		return -1;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
+		struct net *net = dev_net(par->in ? par->in : par->out);
+		unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
+
 		if (dst_mtu(skb_dst(skb)) <= minlen) {
 			net_err_ratelimited("unknown or invalid path-MTU (%u)\n",
 					    dst_mtu(skb_dst(skb)));
@@ -87,9 +126,8 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 		newmss = info->mss;
 
 	opt = (u_int8_t *)tcph;
-	for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
-		if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
-		    opt[i+1] == TCPOLEN_MSS) {
+	for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) {
+		if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) {
 			u_int16_t oldmss;
 
 			oldmss = (opt[i+2] << 8) | opt[i+3];
@@ -112,9 +150,10 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	}
 
 	/* There is data after the header so the option can't be added
-	   without moving it, and doing so may make the SYN packet
-	   itself too large. Accept the packet unmodified instead. */
-	if (tcplen > tcph->doff*4)
+	 * without moving it, and doing so may make the SYN packet
+	 * itself too large. Accept the packet unmodified instead.
+	 */
+	if (len > tcp_hdrlen)
 		return 0;
 
 	/*
@@ -143,10 +182,10 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 		newmss = min(newmss, (u16)1220);
 
 	opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
-	memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
+	memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr));
 
 	inet_proto_csum_replace2(&tcph->check, skb,
-				 htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
+				 htons(len), htons(len + TCPOLEN_MSS), 1);
 	opt[0] = TCPOPT_MSS;
 	opt[1] = TCPOLEN_MSS;
 	opt[2] = (newmss & 0xff00) >> 8;
@@ -161,37 +200,6 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	return TCPOLEN_MSS;
 }
 
-static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
-				    unsigned int family)
-{
-	struct flowi fl;
-	const struct nf_afinfo *ai;
-	struct rtable *rt = NULL;
-	u_int32_t mtu     = ~0U;
-
-	if (family == PF_INET) {
-		struct flowi4 *fl4 = &fl.u.ip4;
-		memset(fl4, 0, sizeof(*fl4));
-		fl4->daddr = ip_hdr(skb)->saddr;
-	} else {
-		struct flowi6 *fl6 = &fl.u.ip6;
-
-		memset(fl6, 0, sizeof(*fl6));
-		fl6->daddr = ipv6_hdr(skb)->saddr;
-	}
-	rcu_read_lock();
-	ai = nf_get_afinfo(family);
-	if (ai != NULL)
-		ai->route(&init_net, (struct dst_entry **)&rt, &fl, false);
-	rcu_read_unlock();
-
-	if (rt != NULL) {
-		mtu = dst_mtu(&rt->dst);
-		dst_release(&rt->dst);
-	}
-	return mtu;
-}
-
 static unsigned int
 tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -200,7 +208,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	int ret;
 
 	ret = tcpmss_mangle_packet(skb, par,
-				   tcpmss_reverse_mtu(skb, PF_INET),
+				   PF_INET,
 				   iph->ihl * 4,
 				   sizeof(*iph) + sizeof(struct tcphdr));
 	if (ret < 0)
@@ -229,7 +237,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 	if (tcphoff < 0)
 		return NF_DROP;
 	ret = tcpmss_mangle_packet(skb, par,
-				   tcpmss_reverse_mtu(skb, PF_INET6),
+				   PF_INET6,
 				   tcphoff,
 				   sizeof(*ipv6h) + sizeof(struct tcphdr));
 	if (ret < 0)
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index b68fa19..625fa1d 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -38,7 +38,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
 	struct tcphdr *tcph;
 	u_int16_t n, o;
 	u_int8_t *opt;
-	int len;
+	int len, tcp_hdrlen;
 
 	/* This is a fragment, no TCP header is available */
 	if (par->fragoff != 0)
@@ -52,7 +52,9 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
 		return NF_DROP;
 
 	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
-	if (tcph->doff * 4 > len)
+	tcp_hdrlen = tcph->doff * 4;
+
+	if (len < tcp_hdrlen)
 		return NF_DROP;
 
 	opt  = (u_int8_t *)tcph;
@@ -61,10 +63,10 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
 	 * Walk through all TCP options - if we find some option to remove,
 	 * set all octets to %TCPOPT_NOP and adjust checksum.
 	 */
-	for (i = sizeof(struct tcphdr); i < tcp_hdrlen(skb); i += optl) {
+	for (i = sizeof(struct tcphdr); i < tcp_hdrlen - 1; i += optl) {
 		optl = optlen(opt, i);
 
-		if (i + optl > tcp_hdrlen(skb))
+		if (i + optl > tcp_hdrlen)
 			break;
 
 		if (!tcpoptstrip_test_bit(info->strip_bmap, opt[i]))
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index bd93e51..292934d 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -200,7 +200,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 static int tee_netdev_event(struct notifier_block *this, unsigned long event,
 			    void *ptr)
 {
-	struct net_device *dev = ptr;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct xt_tee_priv *priv;
 
 	priv = container_of(this, struct xt_tee_priv, notifier);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d7f1953..ef8a926 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -15,7 +15,9 @@
 #include <linux/ip.h>
 #include <net/checksum.h>
 #include <net/udp.h>
+#include <net/tcp.h>
 #include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
 #include <linux/inetdevice.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -26,13 +28,18 @@
 #define XT_TPROXY_HAVE_IPV6 1
 #include <net/if_inet6.h>
 #include <net/addrconf.h>
+#include <net/inet6_hashtables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #endif
 
-#include <net/netfilter/nf_tproxy_core.h>
 #include <linux/netfilter/xt_TPROXY.h>
 
+enum nf_tproxy_lookup_t {
+	 NFT_LOOKUP_LISTENER,
+	 NFT_LOOKUP_ESTABLISHED,
+};
+
 static bool tproxy_sk_is_transparent(struct sock *sk)
 {
 	if (sk->sk_state != TCP_TIME_WAIT) {
@@ -68,6 +75,157 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
 	return laddr ? laddr : daddr;
 }
 
+/*
+ * This is used when the user wants to intercept a connection matching
+ * an explicit iptables rule. In this case the sockets are assumed
+ * matching in preference order:
+ *
+ *   - match: if there's a fully established connection matching the
+ *     _packet_ tuple, it is returned, assuming the redirection
+ *     already took place and we process a packet belonging to an
+ *     established connection
+ *
+ *   - match: if there's a listening socket matching the redirection
+ *     (e.g. on-port & on-ip of the connection), it is returned,
+ *     regardless if it was bound to 0.0.0.0 or an explicit
+ *     address. The reasoning is that if there's an explicit rule, it
+ *     does not really matter if the listener is bound to an interface
+ *     or to 0. The user already stated that he wants redirection
+ *     (since he added the rule).
+ *
+ * Please note that there's an overlap between what a TPROXY target
+ * and a socket match will match. Normally if you have both rules the
+ * "socket" match will be the first one, effectively all packets
+ * belonging to established connections going through that one.
+ */
+static inline struct sock *
+nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
+		      const __be32 saddr, const __be32 daddr,
+		      const __be16 sport, const __be16 dport,
+		      const struct net_device *in,
+		      const enum nf_tproxy_lookup_t lookup_type)
+{
+	struct sock *sk;
+
+	switch (protocol) {
+	case IPPROTO_TCP:
+		switch (lookup_type) {
+		case NFT_LOOKUP_LISTENER:
+			sk = inet_lookup_listener(net, &tcp_hashinfo,
+						    saddr, sport,
+						    daddr, dport,
+						    in->ifindex);
+
+			/* NOTE: we return listeners even if bound to
+			 * 0.0.0.0, those are filtered out in
+			 * xt_socket, since xt_TPROXY needs 0 bound
+			 * listeners too
+			 */
+			break;
+		case NFT_LOOKUP_ESTABLISHED:
+			sk = inet_lookup_established(net, &tcp_hashinfo,
+						    saddr, sport, daddr, dport,
+						    in->ifindex);
+			break;
+		default:
+			BUG();
+		}
+		break;
+	case IPPROTO_UDP:
+		sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
+				     in->ifindex);
+		if (sk) {
+			int connected = (sk->sk_state == TCP_ESTABLISHED);
+			int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
+
+			/* NOTE: we return listeners even if bound to
+			 * 0.0.0.0, those are filtered out in
+			 * xt_socket, since xt_TPROXY needs 0 bound
+			 * listeners too
+			 */
+			if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
+			    (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
+				sock_put(sk);
+				sk = NULL;
+			}
+		}
+		break;
+	default:
+		WARN_ON(1);
+		sk = NULL;
+	}
+
+	pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
+		 protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
+
+	return sk;
+}
+
+#ifdef XT_TPROXY_HAVE_IPV6
+static inline struct sock *
+nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
+		      const struct in6_addr *saddr, const struct in6_addr *daddr,
+		      const __be16 sport, const __be16 dport,
+		      const struct net_device *in,
+		      const enum nf_tproxy_lookup_t lookup_type)
+{
+	struct sock *sk;
+
+	switch (protocol) {
+	case IPPROTO_TCP:
+		switch (lookup_type) {
+		case NFT_LOOKUP_LISTENER:
+			sk = inet6_lookup_listener(net, &tcp_hashinfo,
+						   saddr, sport,
+						   daddr, ntohs(dport),
+						   in->ifindex);
+
+			/* NOTE: we return listeners even if bound to
+			 * 0.0.0.0, those are filtered out in
+			 * xt_socket, since xt_TPROXY needs 0 bound
+			 * listeners too
+			 */
+			break;
+		case NFT_LOOKUP_ESTABLISHED:
+			sk = __inet6_lookup_established(net, &tcp_hashinfo,
+							saddr, sport, daddr, ntohs(dport),
+							in->ifindex);
+			break;
+		default:
+			BUG();
+		}
+		break;
+	case IPPROTO_UDP:
+		sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
+				     in->ifindex);
+		if (sk) {
+			int connected = (sk->sk_state == TCP_ESTABLISHED);
+			int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr);
+
+			/* NOTE: we return listeners even if bound to
+			 * 0.0.0.0, those are filtered out in
+			 * xt_socket, since xt_TPROXY needs 0 bound
+			 * listeners too
+			 */
+			if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
+			    (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
+				sock_put(sk);
+				sk = NULL;
+			}
+		}
+		break;
+	default:
+		WARN_ON(1);
+		sk = NULL;
+	}
+
+	pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
+		 protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
+
+	return sk;
+}
+#endif
+
 /**
  * tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
  * @skb:	The skb being processed.
@@ -117,6 +275,15 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
 	return sk;
 }
 
+/* assign a socket to the skb -- consumes sk */
+static void
+nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
+{
+	skb_orphan(skb);
+	skb->sk = sk;
+	skb->destructor = sock_edemux;
+}
+
 static unsigned int
 tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
 	   u_int32_t mark_mask, u_int32_t mark_value)
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 68ff29f..fab6eea 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -202,7 +202,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 			return -EINVAL;
 		}
 		if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
-			pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n");
+			pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n");
 			return -EINVAL;
 		}
 		if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index e595e07..1e63461 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -26,16 +26,18 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	u_int64_t what = 0;	/* initialize to make gcc happy */
 	u_int64_t bytes = 0;
 	u_int64_t pkts = 0;
+	const struct nf_conn_acct *acct;
 	const struct nf_conn_counter *counters;
 
 	ct = nf_ct_get(skb, &ctinfo);
 	if (!ct)
 		return false;
 
-	counters = nf_conn_acct_find(ct);
-	if (!counters)
+	acct = nf_conn_acct_find(ct);
+	if (!acct)
 		return false;
 
+	counters = acct->counter;
 	switch (sinfo->what) {
 	case XT_CONNBYTES_PKTS:
 		switch (sinfo->direction) {
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index ed0db15..7720b03 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -18,7 +18,7 @@ static bool
 xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_rateest_match_info *info = par->matchinfo;
-	struct gnet_stats_rate_est *r;
+	struct gnet_stats_rate_est64 *r;
 	u_int32_t bps1, bps2, pps1, pps2;
 	bool ret = true;
 
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 31790e7..80c2e2d 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -81,17 +81,17 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
 	struct xt_set_info_match_v0 *info = par->matchinfo;
 	ip_set_id_t index;
 
-	index = ip_set_nfnl_get_byindex(info->match_set.index);
+	index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
 
 	if (index == IPSET_INVALID_ID) {
-		pr_warning("Cannot find set indentified by id %u to match\n",
+		pr_warning("Cannot find set identified by id %u to match\n",
 			   info->match_set.index);
 		return -ENOENT;
 	}
 	if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
 		pr_warning("Protocol error: set match dimension "
 			   "is over the limit!\n");
-		ip_set_nfnl_put(info->match_set.index);
+		ip_set_nfnl_put(par->net, info->match_set.index);
 		return -ERANGE;
 	}
 
@@ -106,9 +106,104 @@ set_match_v0_destroy(const struct xt_mtdtor_param *par)
 {
 	struct xt_set_info_match_v0 *info = par->matchinfo;
 
-	ip_set_nfnl_put(info->match_set.index);
+	ip_set_nfnl_put(par->net, info->match_set.index);
 }
 
+/* Revision 1 match */
+
+static bool
+set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_set_info_match_v1 *info = par->matchinfo;
+	ADT_OPT(opt, par->family, info->match_set.dim,
+		info->match_set.flags, 0, UINT_MAX);
+
+	if (opt.flags & IPSET_RETURN_NOMATCH)
+		opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH;
+
+	return match_set(info->match_set.index, skb, par, &opt,
+			 info->match_set.flags & IPSET_INV_MATCH);
+}
+
+static int
+set_match_v1_checkentry(const struct xt_mtchk_param *par)
+{
+	struct xt_set_info_match_v1 *info = par->matchinfo;
+	ip_set_id_t index;
+
+	index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
+
+	if (index == IPSET_INVALID_ID) {
+		pr_warning("Cannot find set identified by id %u to match\n",
+			   info->match_set.index);
+		return -ENOENT;
+	}
+	if (info->match_set.dim > IPSET_DIM_MAX) {
+		pr_warning("Protocol error: set match dimension "
+			   "is over the limit!\n");
+		ip_set_nfnl_put(par->net, info->match_set.index);
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+static void
+set_match_v1_destroy(const struct xt_mtdtor_param *par)
+{
+	struct xt_set_info_match_v1 *info = par->matchinfo;
+
+	ip_set_nfnl_put(par->net, info->match_set.index);
+}
+
+/* Revision 3 match */
+
+static bool
+match_counter(u64 counter, const struct ip_set_counter_match *info)
+{
+	switch (info->op) {
+	case IPSET_COUNTER_NONE:
+		return true;
+	case IPSET_COUNTER_EQ:
+		return counter == info->value;
+	case IPSET_COUNTER_NE:
+		return counter != info->value;
+	case IPSET_COUNTER_LT:
+		return counter < info->value;
+	case IPSET_COUNTER_GT:
+		return counter > info->value;
+	}
+	return false;
+}
+
+static bool
+set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_set_info_match_v3 *info = par->matchinfo;
+	ADT_OPT(opt, par->family, info->match_set.dim,
+		info->match_set.flags, info->flags, UINT_MAX);
+	int ret;
+
+	if (info->packets.op != IPSET_COUNTER_NONE ||
+	    info->bytes.op != IPSET_COUNTER_NONE)
+		opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
+
+	ret = match_set(info->match_set.index, skb, par, &opt,
+			info->match_set.flags & IPSET_INV_MATCH);
+
+	if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS))
+		return ret;
+
+	if (!match_counter(opt.ext.packets, &info->packets))
+		return 0;
+	return match_counter(opt.ext.bytes, &info->bytes);
+}
+
+#define set_match_v3_checkentry	set_match_v1_checkentry
+#define set_match_v3_destroy	set_match_v1_destroy
+
+/* Revision 0 interface: backward compatible with netfilter/iptables */
+
 static unsigned int
 set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -133,7 +228,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 	ip_set_id_t index;
 
 	if (info->add_set.index != IPSET_INVALID_ID) {
-		index = ip_set_nfnl_get_byindex(info->add_set.index);
+		index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find add_set index %u as target\n",
 				   info->add_set.index);
@@ -142,12 +237,12 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 	}
 
 	if (info->del_set.index != IPSET_INVALID_ID) {
-		index = ip_set_nfnl_get_byindex(info->del_set.index);
+		index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find del_set index %u as target\n",
 				   info->del_set.index);
 			if (info->add_set.index != IPSET_INVALID_ID)
-				ip_set_nfnl_put(info->add_set.index);
+				ip_set_nfnl_put(par->net, info->add_set.index);
 			return -ENOENT;
 		}
 	}
@@ -156,9 +251,9 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 		pr_warning("Protocol error: SET target dimension "
 			   "is over the limit!\n");
 		if (info->add_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(info->add_set.index);
+			ip_set_nfnl_put(par->net, info->add_set.index);
 		if (info->del_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(info->del_set.index);
+			ip_set_nfnl_put(par->net, info->del_set.index);
 		return -ERANGE;
 	}
 
@@ -175,57 +270,12 @@ set_target_v0_destroy(const struct xt_tgdtor_param *par)
 	const struct xt_set_info_target_v0 *info = par->targinfo;
 
 	if (info->add_set.index != IPSET_INVALID_ID)
-		ip_set_nfnl_put(info->add_set.index);
+		ip_set_nfnl_put(par->net, info->add_set.index);
 	if (info->del_set.index != IPSET_INVALID_ID)
-		ip_set_nfnl_put(info->del_set.index);
-}
-
-/* Revision 1 match and target */
-
-static bool
-set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
-{
-	const struct xt_set_info_match_v1 *info = par->matchinfo;
-	ADT_OPT(opt, par->family, info->match_set.dim,
-		info->match_set.flags, 0, UINT_MAX);
-
-	if (opt.flags & IPSET_RETURN_NOMATCH)
-		opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH;
-
-	return match_set(info->match_set.index, skb, par, &opt,
-			 info->match_set.flags & IPSET_INV_MATCH);
-}
-
-static int
-set_match_v1_checkentry(const struct xt_mtchk_param *par)
-{
-	struct xt_set_info_match_v1 *info = par->matchinfo;
-	ip_set_id_t index;
-
-	index = ip_set_nfnl_get_byindex(info->match_set.index);
-
-	if (index == IPSET_INVALID_ID) {
-		pr_warning("Cannot find set indentified by id %u to match\n",
-			   info->match_set.index);
-		return -ENOENT;
-	}
-	if (info->match_set.dim > IPSET_DIM_MAX) {
-		pr_warning("Protocol error: set match dimension "
-			   "is over the limit!\n");
-		ip_set_nfnl_put(info->match_set.index);
-		return -ERANGE;
-	}
-
-	return 0;
+		ip_set_nfnl_put(par->net, info->del_set.index);
 }
 
-static void
-set_match_v1_destroy(const struct xt_mtdtor_param *par)
-{
-	struct xt_set_info_match_v1 *info = par->matchinfo;
-
-	ip_set_nfnl_put(info->match_set.index);
-}
+/* Revision 1 target */
 
 static unsigned int
 set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
@@ -251,7 +301,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
 	ip_set_id_t index;
 
 	if (info->add_set.index != IPSET_INVALID_ID) {
-		index = ip_set_nfnl_get_byindex(info->add_set.index);
+		index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find add_set index %u as target\n",
 				   info->add_set.index);
@@ -260,12 +310,12 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
 	}
 
 	if (info->del_set.index != IPSET_INVALID_ID) {
-		index = ip_set_nfnl_get_byindex(info->del_set.index);
+		index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find del_set index %u as target\n",
 				   info->del_set.index);
 			if (info->add_set.index != IPSET_INVALID_ID)
-				ip_set_nfnl_put(info->add_set.index);
+				ip_set_nfnl_put(par->net, info->add_set.index);
 			return -ENOENT;
 		}
 	}
@@ -274,9 +324,9 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
 		pr_warning("Protocol error: SET target dimension "
 			   "is over the limit!\n");
 		if (info->add_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(info->add_set.index);
+			ip_set_nfnl_put(par->net, info->add_set.index);
 		if (info->del_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(info->del_set.index);
+			ip_set_nfnl_put(par->net, info->del_set.index);
 		return -ERANGE;
 	}
 
@@ -289,9 +339,9 @@ set_target_v1_destroy(const struct xt_tgdtor_param *par)
 	const struct xt_set_info_target_v1 *info = par->targinfo;
 
 	if (info->add_set.index != IPSET_INVALID_ID)
-		ip_set_nfnl_put(info->add_set.index);
+		ip_set_nfnl_put(par->net, info->add_set.index);
 	if (info->del_set.index != IPSET_INVALID_ID)
-		ip_set_nfnl_put(info->del_set.index);
+		ip_set_nfnl_put(par->net, info->del_set.index);
 }
 
 /* Revision 2 target */
@@ -320,52 +370,6 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 #define set_target_v2_checkentry	set_target_v1_checkentry
 #define set_target_v2_destroy		set_target_v1_destroy
 
-/* Revision 3 match */
-
-static bool
-match_counter(u64 counter, const struct ip_set_counter_match *info)
-{
-	switch (info->op) {
-	case IPSET_COUNTER_NONE:
-		return true;
-	case IPSET_COUNTER_EQ:
-		return counter == info->value;
-	case IPSET_COUNTER_NE:
-		return counter != info->value;
-	case IPSET_COUNTER_LT:
-		return counter < info->value;
-	case IPSET_COUNTER_GT:
-		return counter > info->value;
-	}
-	return false;
-}
-
-static bool
-set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
-{
-	const struct xt_set_info_match_v3 *info = par->matchinfo;
-	ADT_OPT(opt, par->family, info->match_set.dim,
-		info->match_set.flags, info->flags, UINT_MAX);
-	int ret;
-
-	if (info->packets.op != IPSET_COUNTER_NONE ||
-	    info->bytes.op != IPSET_COUNTER_NONE)
-		opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
-
-	ret = match_set(info->match_set.index, skb, par, &opt,
-			info->match_set.flags & IPSET_INV_MATCH);
-
-	if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS))
-		return ret;
-
-	if (!match_counter(opt.ext.packets, &info->packets))
-		return 0;
-	return match_counter(opt.ext.bytes, &info->bytes);
-}
-
-#define set_match_v3_checkentry	set_match_v1_checkentry
-#define set_match_v3_destroy	set_match_v1_destroy
-
 static struct xt_match set_matches[] __read_mostly = {
 	{
 		.name		= "set",
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 63b2bdb..1ba6793 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -19,12 +19,12 @@
 #include <net/icmp.h>
 #include <net/sock.h>
 #include <net/inet_sock.h>
-#include <net/netfilter/nf_tproxy_core.h>
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
 
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 #define XT_SOCKET_HAVE_IPV6 1
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/inet6_hashtables.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #endif
 
@@ -35,15 +35,6 @@
 #include <net/netfilter/nf_conntrack.h>
 #endif
 
-static void
-xt_socket_put_sk(struct sock *sk)
-{
-	if (sk->sk_state == TCP_TIME_WAIT)
-		inet_twsk_put(inet_twsk(sk));
-	else
-		sock_put(sk);
-}
-
 static int
 extract_icmp4_fields(const struct sk_buff *skb,
 		    u8 *protocol,
@@ -101,13 +92,50 @@ extract_icmp4_fields(const struct sk_buff *skb,
 	return 0;
 }
 
+/* "socket" match based redirection (no specific rule)
+ * ===================================================
+ *
+ * There are connections with dynamic endpoints (e.g. FTP data
+ * connection) that the user is unable to add explicit rules
+ * for. These are taken care of by a generic "socket" rule. It is
+ * assumed that the proxy application is trusted to open such
+ * connections without explicit iptables rule (except of course the
+ * generic 'socket' rule). In this case the following sockets are
+ * matched in preference order:
+ *
+ *   - match: if there's a fully established connection matching the
+ *     _packet_ tuple
+ *
+ *   - match: if there's a non-zero bound listener (possibly with a
+ *     non-local address) We don't accept zero-bound listeners, since
+ *     then local services could intercept traffic going through the
+ *     box.
+ */
+static struct sock *
+xt_socket_get_sock_v4(struct net *net, const u8 protocol,
+		      const __be32 saddr, const __be32 daddr,
+		      const __be16 sport, const __be16 dport,
+		      const struct net_device *in)
+{
+	switch (protocol) {
+	case IPPROTO_TCP:
+		return __inet_lookup(net, &tcp_hashinfo,
+				     saddr, sport, daddr, dport,
+				     in->ifindex);
+	case IPPROTO_UDP:
+		return udp4_lib_lookup(net, saddr, sport, daddr, dport,
+				       in->ifindex);
+	}
+	return NULL;
+}
+
 static bool
 socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	     const struct xt_socket_mtinfo1 *info)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct udphdr _hdr, *hp = NULL;
-	struct sock *sk;
+	struct sock *sk = skb->sk;
 	__be32 uninitialized_var(daddr), uninitialized_var(saddr);
 	__be16 uninitialized_var(dport), uninitialized_var(sport);
 	u8 uninitialized_var(protocol);
@@ -155,25 +183,31 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	}
 #endif
 
-	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
-				   saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
-	if (sk != NULL) {
+	if (!sk)
+		sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
+					   saddr, daddr, sport, dport,
+					   par->in);
+	if (sk) {
 		bool wildcard;
 		bool transparent = true;
 
-		/* Ignore sockets listening on INADDR_ANY */
-		wildcard = (sk->sk_state != TCP_TIME_WAIT &&
+		/* Ignore sockets listening on INADDR_ANY,
+		 * unless XT_SOCKET_NOWILDCARD is set
+		 */
+		wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
+			    sk->sk_state != TCP_TIME_WAIT &&
 			    inet_sk(sk)->inet_rcv_saddr == 0);
 
 		/* Ignore non-transparent sockets,
 		   if XT_SOCKET_TRANSPARENT is used */
-		if (info && info->flags & XT_SOCKET_TRANSPARENT)
+		if (info->flags & XT_SOCKET_TRANSPARENT)
 			transparent = ((sk->sk_state != TCP_TIME_WAIT &&
 					inet_sk(sk)->transparent) ||
 				       (sk->sk_state == TCP_TIME_WAIT &&
 					inet_twsk(sk)->tw_transparent));
 
-		xt_socket_put_sk(sk);
+		if (sk != skb->sk)
+			sock_gen_put(sk);
 
 		if (wildcard || !transparent)
 			sk = NULL;
@@ -190,11 +224,15 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 static bool
 socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	return socket_match(skb, par, NULL);
+	static struct xt_socket_mtinfo1 xt_info_v0 = {
+		.flags = 0,
+	};
+
+	return socket_match(skb, par, &xt_info_v0);
 }
 
 static bool
-socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par)
+socket_mt4_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	return socket_match(skb, par, par->matchinfo);
 }
@@ -255,12 +293,31 @@ extract_icmp6_fields(const struct sk_buff *skb,
 	return 0;
 }
 
+static struct sock *
+xt_socket_get_sock_v6(struct net *net, const u8 protocol,
+		      const struct in6_addr *saddr, const struct in6_addr *daddr,
+		      const __be16 sport, const __be16 dport,
+		      const struct net_device *in)
+{
+	switch (protocol) {
+	case IPPROTO_TCP:
+		return inet6_lookup(net, &tcp_hashinfo,
+				    saddr, sport, daddr, dport,
+				    in->ifindex);
+	case IPPROTO_UDP:
+		return udp6_lib_lookup(net, saddr, sport, daddr, dport,
+				       in->ifindex);
+	}
+
+	return NULL;
+}
+
 static bool
-socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
+socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct udphdr _hdr, *hp = NULL;
-	struct sock *sk;
+	struct sock *sk = skb->sk;
 	struct in6_addr *daddr = NULL, *saddr = NULL;
 	__be16 uninitialized_var(dport), uninitialized_var(sport);
 	int thoff = 0, uninitialized_var(tproto);
@@ -291,25 +348,31 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
 		return false;
 	}
 
-	sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
-				   saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
-	if (sk != NULL) {
+	if (!sk)
+		sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
+					   saddr, daddr, sport, dport,
+					   par->in);
+	if (sk) {
 		bool wildcard;
 		bool transparent = true;
 
-		/* Ignore sockets listening on INADDR_ANY */
-		wildcard = (sk->sk_state != TCP_TIME_WAIT &&
-			    ipv6_addr_any(&inet6_sk(sk)->rcv_saddr));
+		/* Ignore sockets listening on INADDR_ANY
+		 * unless XT_SOCKET_NOWILDCARD is set
+		 */
+		wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
+			    sk->sk_state != TCP_TIME_WAIT &&
+			    ipv6_addr_any(&sk->sk_v6_rcv_saddr));
 
 		/* Ignore non-transparent sockets,
 		   if XT_SOCKET_TRANSPARENT is used */
-		if (info && info->flags & XT_SOCKET_TRANSPARENT)
+		if (info->flags & XT_SOCKET_TRANSPARENT)
 			transparent = ((sk->sk_state != TCP_TIME_WAIT &&
 					inet_sk(sk)->transparent) ||
 				       (sk->sk_state == TCP_TIME_WAIT &&
 					inet_twsk(sk)->tw_transparent));
 
-		xt_socket_put_sk(sk);
+		if (sk != skb->sk)
+			sock_gen_put(sk);
 
 		if (wildcard || !transparent)
 			sk = NULL;
@@ -325,6 +388,28 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
 }
 #endif
 
+static int socket_mt_v1_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+
+	if (info->flags & ~XT_SOCKET_FLAGS_V1) {
+		pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int socket_mt_v2_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo;
+
+	if (info->flags & ~XT_SOCKET_FLAGS_V2) {
+		pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static struct xt_match socket_mt_reg[] __read_mostly = {
 	{
 		.name		= "socket",
@@ -339,7 +424,8 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.name		= "socket",
 		.revision	= 1,
 		.family		= NFPROTO_IPV4,
-		.match		= socket_mt4_v1,
+		.match		= socket_mt4_v1_v2,
+		.checkentry	= socket_mt_v1_check,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
 				  (1 << NF_INET_LOCAL_IN),
@@ -350,7 +436,32 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.name		= "socket",
 		.revision	= 1,
 		.family		= NFPROTO_IPV6,
-		.match		= socket_mt6_v1,
+		.match		= socket_mt6_v1_v2,
+		.checkentry	= socket_mt_v1_check,
+		.matchsize	= sizeof(struct xt_socket_mtinfo1),
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+#endif
+	{
+		.name		= "socket",
+		.revision	= 2,
+		.family		= NFPROTO_IPV4,
+		.match		= socket_mt4_v1_v2,
+		.checkentry	= socket_mt_v2_check,
+		.matchsize	= sizeof(struct xt_socket_mtinfo1),
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+#ifdef XT_SOCKET_HAVE_IPV6
+	{
+		.name		= "socket",
+		.revision	= 2,
+		.family		= NFPROTO_IPV6,
+		.match		= socket_mt6_v1_v2,
+		.checkentry	= socket_mt_v2_check,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
 				  (1 << NF_INET_LOCAL_IN),
author	Scott Wood <scottwood@freescale.com>	2013-12-14 01:15:24 (GMT)
committer	Scott Wood <scottwood@freescale.com>	2013-12-14 01:15:24 (GMT)
commit	b7c81aa3ab2ac2c140e278b6d0e9a0b95112cf0b (patch)
tree	87828aaf5f82c7042bfc0307bc4ac499e10f93fb /net/netfilter
parent	22c782a4b14773fab7eab3c1db54ad7ad077e9b8 (diff)
parent	78fd82238d0e5716578c326404184a27ba67fd6e (diff)
download	linux-fsl-qoriq-b7c81aa3ab2ac2c140e278b6d0e9a0b95112cf0b.tar.xz