From ef222013fc8c1a2fcba5c7ab169be8ffcb778ec4 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 11 Jul 2007 06:42:04 +0200
Subject: [Bluetooth] Add hci_recv_fragment() helper function

Most drivers must handle fragmented HCI data packets and events. This
patch adds a generic function for their reassembly to the Bluetooth
core layer and thus allows to shrink the complexity of the drivers.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index c0fc396..7c78744 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -109,6 +109,7 @@ struct hci_dev {
 	struct sk_buff_head	cmd_q;
 
 	struct sk_buff		*sent_cmd;
+	struct sk_buff		*reassembly[3];
 
 	struct semaphore	req_lock;
 	wait_queue_head_t	req_wait_q;
@@ -437,6 +438,8 @@ static inline int hci_recv_frame(struct sk_buff *skb)
 	return 0;
 }
 
+int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count);
+
 int hci_register_sysfs(struct hci_dev *hdev);
 void hci_unregister_sysfs(struct hci_dev *hdev);
 void hci_conn_add_sysfs(struct hci_conn *conn);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index aa4b56a..9c71cff 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -826,7 +826,7 @@ EXPORT_SYMBOL(hci_free_dev);
 int hci_register_dev(struct hci_dev *hdev)
 {
 	struct list_head *head = &hci_dev_list, *p;
-	int id = 0;
+	int i, id = 0;
 
 	BT_DBG("%p name %s type %d owner %p", hdev, hdev->name, hdev->type, hdev->owner);
 
@@ -865,6 +865,9 @@ int hci_register_dev(struct hci_dev *hdev)
 	skb_queue_head_init(&hdev->cmd_q);
 	skb_queue_head_init(&hdev->raw_q);
 
+	for (i = 0; i < 3; i++)
+		hdev->reassembly[i] = NULL;
+
 	init_waitqueue_head(&hdev->req_wait_q);
 	init_MUTEX(&hdev->req_lock);
 
@@ -889,6 +892,8 @@ EXPORT_SYMBOL(hci_register_dev);
 /* Unregister HCI device */
 int hci_unregister_dev(struct hci_dev *hdev)
 {
+	int i;
+
 	BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
 
 	hci_unregister_sysfs(hdev);
@@ -899,9 +904,13 @@ int hci_unregister_dev(struct hci_dev *hdev)
 
 	hci_dev_do_close(hdev);
 
+	for (i = 0; i < 3; i++)
+		kfree_skb(hdev->reassembly[i]);
+
 	hci_notify(hdev, HCI_DEV_UNREG);
 
 	__hci_dev_put(hdev);
+
 	return 0;
 }
 EXPORT_SYMBOL(hci_unregister_dev);
@@ -922,6 +931,90 @@ int hci_resume_dev(struct hci_dev *hdev)
 }
 EXPORT_SYMBOL(hci_resume_dev);
 
+/* Receive packet type fragment */
+#define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 2])
+
+int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
+{
+	if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT)
+		return -EILSEQ;
+
+	while (count) {
+		struct sk_buff *skb = __reassembly(hdev, type);
+		struct { int expect; } *scb;
+		int len = 0;
+
+		if (!skb) {
+			/* Start of the frame */
+
+			switch (type) {
+			case HCI_EVENT_PKT:
+				if (count >= HCI_EVENT_HDR_SIZE) {
+					struct hci_event_hdr *h = data;
+					len = HCI_EVENT_HDR_SIZE + h->plen;
+				} else
+					return -EILSEQ;
+				break;
+
+			case HCI_ACLDATA_PKT:
+				if (count >= HCI_ACL_HDR_SIZE) {
+					struct hci_acl_hdr *h = data;
+					len = HCI_ACL_HDR_SIZE + __le16_to_cpu(h->dlen);
+				} else
+					return -EILSEQ;
+				break;
+
+			case HCI_SCODATA_PKT:
+				if (count >= HCI_SCO_HDR_SIZE) {
+					struct hci_sco_hdr *h = data;
+					len = HCI_SCO_HDR_SIZE + h->dlen;
+				} else
+					return -EILSEQ;
+				break;
+			}
+
+			skb = bt_skb_alloc(len, GFP_ATOMIC);
+			if (!skb) {
+				BT_ERR("%s no memory for packet", hdev->name);
+				return -ENOMEM;
+			}
+
+			skb->dev = (void *) hdev;
+			bt_cb(skb)->pkt_type = type;
+	
+			__reassembly(hdev, type) = skb;
+
+			scb = (void *) skb->cb;
+			scb->expect = len;
+		} else {
+			/* Continuation */
+
+			scb = (void *) skb->cb;
+			len = scb->expect;
+		}
+
+		len = min(len, count);
+
+		memcpy(skb_put(skb, len), data, len);
+
+		scb->expect -= len;
+
+		if (scb->expect == 0) {
+			/* Complete frame */
+
+			__reassembly(hdev, type) = NULL;
+
+			bt_cb(skb)->pkt_type = type;
+			hci_recv_frame(skb);
+		}
+
+		count -= len; data += len;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(hci_recv_fragment);
+
 /* ---- Interface to upper protocols ---- */
 
 /* Register/Unregister protocols.
@@ -1029,7 +1122,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p
 
 	skb = bt_skb_alloc(len, GFP_ATOMIC);
 	if (!skb) {
-		BT_ERR("%s Can't allocate memory for HCI command", hdev->name);
+		BT_ERR("%s no memory for command", hdev->name);
 		return -ENOMEM;
 	}
 
-- 
cgit v0.10.2


From babf4d42d0a41ff58463b0a0515683cdce66f541 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 11 Jul 2007 06:42:35 +0200
Subject: [Bluetooth] Use hci_recv_fragment() within HCI USB driver

This patch modifies the HCI USB driver to use the new helper function
for reassembling HCI data packets and events.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c
index 7e04dd6..59b0548 100644
--- a/drivers/bluetooth/hci_usb.c
+++ b/drivers/bluetooth/hci_usb.c
@@ -199,7 +199,6 @@ static void hci_usb_tx_complete(struct urb *urb);
 #define __pending_q(husb, type)   (&husb->pending_q[type-1])
 #define __completed_q(husb, type) (&husb->completed_q[type-1])
 #define __transmit_q(husb, type)  (&husb->transmit_q[type-1])
-#define __reassembly(husb, type)  (husb->reassembly[type-1])
 
 static inline struct _urb *__get_completed(struct hci_usb *husb, int type)
 {
@@ -429,12 +428,6 @@ static void hci_usb_unlink_urbs(struct hci_usb *husb)
 			kfree(urb->transfer_buffer);
 			_urb_free(_urb);
 		}
-
-		/* Release reassembly buffers */
-		if (husb->reassembly[i]) {
-			kfree_skb(husb->reassembly[i]);
-			husb->reassembly[i] = NULL;
-		}
 	}
 }
 
@@ -671,83 +664,6 @@ static int hci_usb_send_frame(struct sk_buff *skb)
 	return 0;
 }
 
-static inline int __recv_frame(struct hci_usb *husb, int type, void *data, int count)
-{
-	BT_DBG("%s type %d data %p count %d", husb->hdev->name, type, data, count);
-
-	husb->hdev->stat.byte_rx += count;
-
-	while (count) {
-		struct sk_buff *skb = __reassembly(husb, type);
-		struct { int expect; } *scb;
-		int len = 0;
-	
-		if (!skb) {
-			/* Start of the frame */
-
-			switch (type) {
-			case HCI_EVENT_PKT:
-				if (count >= HCI_EVENT_HDR_SIZE) {
-					struct hci_event_hdr *h = data;
-					len = HCI_EVENT_HDR_SIZE + h->plen;
-				} else
-					return -EILSEQ;
-				break;
-
-			case HCI_ACLDATA_PKT:
-				if (count >= HCI_ACL_HDR_SIZE) {
-					struct hci_acl_hdr *h = data;
-					len = HCI_ACL_HDR_SIZE + __le16_to_cpu(h->dlen);
-				} else
-					return -EILSEQ;
-				break;
-#ifdef CONFIG_BT_HCIUSB_SCO
-			case HCI_SCODATA_PKT:
-				if (count >= HCI_SCO_HDR_SIZE) {
-					struct hci_sco_hdr *h = data;
-					len = HCI_SCO_HDR_SIZE + h->dlen;
-				} else
-					return -EILSEQ;
-				break;
-#endif
-			}
-			BT_DBG("new packet len %d", len);
-
-			skb = bt_skb_alloc(len, GFP_ATOMIC);
-			if (!skb) {
-				BT_ERR("%s no memory for the packet", husb->hdev->name);
-				return -ENOMEM;
-			}
-			skb->dev = (void *) husb->hdev;
-			bt_cb(skb)->pkt_type = type;
-	
-			__reassembly(husb, type) = skb;
-
-			scb = (void *) skb->cb;
-			scb->expect = len;
-		} else {
-			/* Continuation */
-			scb = (void *) skb->cb;
-			len = scb->expect;
-		}
-
-		len = min(len, count);
-		
-		memcpy(skb_put(skb, len), data, len);
-
-		scb->expect -= len;
-		if (!scb->expect) {
-			/* Complete frame */
-			__reassembly(husb, type) = NULL;
-			bt_cb(skb)->pkt_type = type;
-			hci_recv_frame(skb);
-		}
-
-		count -= len; data += len;
-	}
-	return 0;
-}
-
 static void hci_usb_rx_complete(struct urb *urb)
 {
 	struct _urb *_urb = container_of(urb, struct _urb, urb);
@@ -776,7 +692,7 @@ static void hci_usb_rx_complete(struct urb *urb)
 					urb->iso_frame_desc[i].actual_length);
 	
 			if (!urb->iso_frame_desc[i].status)
-				__recv_frame(husb, _urb->type, 
+				hci_recv_fragment(husb->hdev, _urb->type, 
 					urb->transfer_buffer + urb->iso_frame_desc[i].offset,
 					urb->iso_frame_desc[i].actual_length);
 		}
@@ -784,7 +700,7 @@ static void hci_usb_rx_complete(struct urb *urb)
 		;
 #endif
 	} else {
-		err = __recv_frame(husb, _urb->type, urb->transfer_buffer, count);
+		err = hci_recv_fragment(husb->hdev, _urb->type, urb->transfer_buffer, count);
 		if (err < 0) { 
 			BT_ERR("%s corrupted packet: type %d count %d",
 					husb->hdev->name, _urb->type, count);
diff --git a/drivers/bluetooth/hci_usb.h b/drivers/bluetooth/hci_usb.h
index 963fc55..56cd3a9 100644
--- a/drivers/bluetooth/hci_usb.h
+++ b/drivers/bluetooth/hci_usb.h
@@ -102,9 +102,9 @@ struct hci_usb {
 	struct hci_dev		*hdev;
 
 	unsigned long		state;
-	
+
 	struct usb_device	*udev;
-	
+
 	struct usb_host_endpoint	*bulk_in_ep;
 	struct usb_host_endpoint	*bulk_out_ep;
 	struct usb_host_endpoint	*intr_in_ep;
@@ -116,7 +116,6 @@ struct hci_usb {
 	__u8			ctrl_req;
 
 	struct sk_buff_head	transmit_q[4];
-	struct sk_buff		*reassembly[4];		/* Reassembly buffers */
 
 	rwlock_t		completion_lock;
 
-- 
cgit v0.10.2


From 924f0e4a06419fa1996425d1d2512030e43acbba Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Wed, 11 Jul 2007 09:07:07 +0200
Subject: [Bluetooth] Remove the redundant non-seekable llseek method

Remove the llseek method given that the open method already calls
nonseekable_open().

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index b71a5cc..0638730 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -180,11 +180,6 @@ static inline ssize_t vhci_put_user(struct vhci_data *data,
 	return total;
 }
 
-static loff_t vhci_llseek(struct file *file, loff_t offset, int origin)
-{
-	return -ESPIPE;
-}
-
 static ssize_t vhci_read(struct file *file,
 				char __user *buf, size_t count, loff_t *pos)
 {
@@ -334,7 +329,6 @@ static int vhci_fasync(int fd, struct file *file, int on)
 
 static const struct file_operations vhci_fops = {
 	.owner		= THIS_MODULE,
-	.llseek		= vhci_llseek,
 	.read		= vhci_read,
 	.write		= vhci_write,
 	.poll		= vhci_poll,
-- 
cgit v0.10.2


From 84950cf0ba02fd6a5defe2511bc41f9aa2237632 Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Wed, 11 Jul 2007 09:18:15 +0200
Subject: [Bluetooth] Hangup TTY before releasing rfcomm_dev

The core problem is that RFCOMM socket layer ioctl can release
rfcomm_dev struct while RFCOMM TTY layer is still actively using
it. Calling tty_vhangup() is needed for a synchronous hangup before
rfcomm_dev is freed.

Addresses the oops at http://bugzilla.kernel.org/show_bug.cgi?id=7509

Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index b2b1cce..ba469b0 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -383,6 +383,10 @@ static int rfcomm_release_dev(void __user *arg)
 	if (req.flags & (1 << RFCOMM_HANGUP_NOW))
 		rfcomm_dlc_close(dev->dlc, 0);
 
+	/* Shut down TTY synchronously before freeing rfcomm_dev */
+	if (dev->tty)
+		tty_vhangup(dev->tty);
+
 	rfcomm_dev_del(dev);
 	rfcomm_dev_put(dev);
 	return 0;
-- 
cgit v0.10.2


From e06e7c615877026544ad7f8b309d1a3706410383 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Sun, 10 Jun 2007 17:22:39 -0700
Subject: [IPV4]: The scheduled removal of multipath cached routing support.

With help from Chris Wedgwood.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 3a159da..484250d 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -262,25 +262,6 @@ Who:	Richard Purdie <rpurdie@rpsys.net>
 
 ---------------------------
 
-What:	Multipath cached routing support in ipv4
-When:	in 2.6.23
-Why:	Code was merged, then submitter immediately disappeared leaving
-	us with no maintainer and lots of bugs.  The code should not have
-	been merged in the first place, and many aspects of it's
-	implementation are blocking more critical core networking
-	development.  It's marked EXPERIMENTAL and no distribution
-	enables it because it cause obscure crashes due to unfixable bugs
-	(interfaces don't return errors so memory allocation can't be
-	handled, calling contexts of these interfaces make handling
-	errors impossible too because they get called after we've
-	totally commited to creating a route object, for example).
-	This problem has existed for years and no forward progress
-	has ever been made, and nobody steps up to try and salvage
-	this code, so we're going to finally just get rid of it.
-Who:	David S. Miller <davem@davemloft.net>
-
----------------------------
-
 What:	read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer)
 When:	December 2007
 Why:	These functions are a leftover from 2.4 times. They have several
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index afae306..d944516 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -91,7 +91,6 @@ header-y += in6.h
 header-y += in_route.h
 header-y += ioctl.h
 header-y += ipmi_msgdefs.h
-header-y += ip_mp_alg.h
 header-y += ipsec.h
 header-y += ipx.h
 header-y += irda.h
diff --git a/include/linux/ip_mp_alg.h b/include/linux/ip_mp_alg.h
deleted file mode 100644
index e234e20..0000000
--- a/include/linux/ip_mp_alg.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* ip_mp_alg.h: IPV4 multipath algorithm support, user-visible values.
- *
- * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
- */
-
-#ifndef _LINUX_IP_MP_ALG_H
-#define _LINUX_IP_MP_ALG_H
-
-enum ip_mp_alg {
-	IP_MP_ALG_NONE,
-	IP_MP_ALG_RR,
-	IP_MP_ALG_DRR,
-	IP_MP_ALG_RANDOM,
-	IP_MP_ALG_WRANDOM,
-	__IP_MP_ALG_MAX
-};
-
-#define IP_MP_ALG_MAX (__IP_MP_ALG_MAX - 1)
-
-#endif /* _LINUX_IP_MP_ALG_H */
-
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 1fae30a..6127858 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -261,7 +261,7 @@ enum rtattr_type_t
 	RTA_FLOW,
 	RTA_CACHEINFO,
 	RTA_SESSION,
-	RTA_MP_ALGO,
+	RTA_MP_ALGO, /* no longer used */
 	RTA_TABLE,
 	__RTA_MAX
 };
diff --git a/include/net/dst.h b/include/net/dst.h
index 82270f9..e9ff4a4 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -47,7 +47,6 @@ struct dst_entry
 #define DST_NOXFRM		2
 #define DST_NOPOLICY		4
 #define DST_NOHASH		8
-#define DST_BALANCED            0x10
 	unsigned long		expires;
 
 	unsigned short		header_len;	/* more space at head required */
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 69252cb..8cadc77 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -39,7 +39,6 @@ struct fib_config {
 	int			fc_mx_len;
 	int			fc_mp_len;
 	u32			fc_flow;
-	u32			fc_mp_alg;
 	u32			fc_nlflags;
 	struct nl_info		fc_nlinfo;
  };
@@ -86,9 +85,6 @@ struct fib_info {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int			fib_power;
 #endif
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	u32			fib_mp_alg;
-#endif
 	struct fib_nh		fib_nh[0];
 #define fib_dev		fib_nh[0].nh_dev
 };
@@ -103,10 +99,6 @@ struct fib_result {
 	unsigned char	nh_sel;
 	unsigned char	type;
 	unsigned char	scope;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	__be32          network;
-	__be32          netmask;
-#endif
 	struct fib_info *fi;
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_rule	*r;
@@ -145,14 +137,6 @@ struct fib_result_nl {
 #define FIB_RES_DEV(res)		(FIB_RES_NH(res).nh_dev)
 #define FIB_RES_OIF(res)		(FIB_RES_NH(res).nh_oif)
 
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-#define FIB_RES_NETWORK(res)		((res).network)
-#define FIB_RES_NETMASK(res)	        ((res).netmask)
-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
-#define FIB_RES_NETWORK(res)		(0)
-#define FIB_RES_NETMASK(res)	        (0)
-#endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */
-
 struct fib_table {
 	struct hlist_node tb_hlist;
 	u32		tb_id;
diff --git a/include/net/ip_mp_alg.h b/include/net/ip_mp_alg.h
deleted file mode 100644
index 25b5657..0000000
--- a/include/net/ip_mp_alg.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/* ip_mp_alg.h: IPV4 multipath algorithm support.
- *
- * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
- */
-
-#ifndef _NET_IP_MP_ALG_H
-#define _NET_IP_MP_ALG_H
-
-#include <linux/ip_mp_alg.h>
-#include <net/flow.h>
-#include <net/route.h>
-
-struct fib_nh;
-
-struct ip_mp_alg_ops {
-	void	(*mp_alg_select_route)(const struct flowi *flp,
-				       struct rtable *rth, struct rtable **rp);
-	void	(*mp_alg_flush)(void);
-	void	(*mp_alg_set_nhinfo)(__be32 network, __be32 netmask,
-				     unsigned char prefixlen,
-				     const struct fib_nh *nh);
-	void	(*mp_alg_remove)(struct rtable *rth);
-};
-
-extern int multipath_alg_register(struct ip_mp_alg_ops *, enum ip_mp_alg);
-extern void multipath_alg_unregister(struct ip_mp_alg_ops *, enum ip_mp_alg);
-
-extern struct ip_mp_alg_ops *ip_mp_alg_table[];
-
-static inline int multipath_select_route(const struct flowi *flp,
-					 struct rtable *rth,
-					 struct rtable **rp)
-{
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
-
-	/* mp_alg_select_route _MUST_ be implemented */
-	if (ops && (rth->u.dst.flags & DST_BALANCED)) {
-		ops->mp_alg_select_route(flp, rth, rp);
-		return 1;
-	}
-#endif
-	return 0;
-}
-
-static inline void multipath_flush(void)
-{
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	int i;
-
-	for (i = IP_MP_ALG_NONE; i <= IP_MP_ALG_MAX; i++) {
-		struct ip_mp_alg_ops *ops = ip_mp_alg_table[i];
-
-		if (ops && ops->mp_alg_flush)
-			ops->mp_alg_flush();
-	}
-#endif
-}
-
-static inline void multipath_set_nhinfo(struct rtable *rth,
-					__be32 network, __be32 netmask,
-					unsigned char prefixlen,
-					const struct fib_nh *nh)
-{
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
-
-	if (ops && ops->mp_alg_set_nhinfo)
-		ops->mp_alg_set_nhinfo(network, netmask, prefixlen, nh);
-#endif
-}
-
-static inline void multipath_remove(struct rtable *rth)
-{
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
-
-	if (ops && ops->mp_alg_remove &&
-	    (rth->u.dst.flags & DST_BALANCED))
-		ops->mp_alg_remove(rth);
-#endif
-}
-
-static inline int multipath_comparekeys(const struct flowi *flp1,
-					const struct flowi *flp2)
-{
-	return flp1->fl4_dst == flp2->fl4_dst &&
-		flp1->fl4_src == flp2->fl4_src &&
-		flp1->oif == flp2->oif &&
-		flp1->mark == flp2->mark &&
-		!((flp1->fl4_tos ^ flp2->fl4_tos) &
-		  (IPTOS_RT_MASK | RTO_ONLINK));
-}
-
-#endif /* _NET_IP_MP_ALG_H */
diff --git a/include/net/route.h b/include/net/route.h
index 749e4df..188b893 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -62,7 +62,6 @@ struct rtable
 	
 	unsigned		rt_flags;
 	__u16			rt_type;
-	__u16			rt_multipath_alg;
 
 	__be32			rt_dst;	/* Path destination	*/
 	__be32			rt_src;	/* Path source		*/
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 010fbb2..fb79097 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -116,48 +116,6 @@ config IP_ROUTE_MULTIPATH
 	  equal "cost" and chooses one of them in a non-deterministic fashion
 	  if a matching packet arrives.
 
-config IP_ROUTE_MULTIPATH_CACHED
-	bool "IP: equal cost multipath with caching support (EXPERIMENTAL)"
-	depends on IP_ROUTE_MULTIPATH
-	help
-	  Normally, equal cost multipath routing is not supported by the
-	  routing cache. If you say Y here, alternative routes are cached
-	  and on cache lookup a route is chosen in a configurable fashion.
-
-	  If unsure, say N.
-
-config IP_ROUTE_MULTIPATH_RR
-	tristate "MULTIPATH: round robin algorithm"
-	depends on IP_ROUTE_MULTIPATH_CACHED
-	help
-	  Multipath routes are chosen according to Round Robin
-
-config IP_ROUTE_MULTIPATH_RANDOM
-	tristate "MULTIPATH: random algorithm"
-	depends on IP_ROUTE_MULTIPATH_CACHED
-	help
-	  Multipath routes are chosen in a random fashion. Actually,
-	  there is no weight for a route. The advantage of this policy
-	  is that it is implemented stateless and therefore introduces only
-	  a very small delay.
-
-config IP_ROUTE_MULTIPATH_WRANDOM
-	tristate "MULTIPATH: weighted random algorithm"
-	depends on IP_ROUTE_MULTIPATH_CACHED
-	help
-	  Multipath routes are chosen in a weighted random fashion. 
-	  The per route weights are the weights visible via ip route 2. As the
-	  corresponding state management introduces some overhead routing delay
-	  is increased.
-
-config IP_ROUTE_MULTIPATH_DRR
-	tristate "MULTIPATH: interface round robin algorithm"
-	depends on IP_ROUTE_MULTIPATH_CACHED
-	help
-	  Connections are distributed in a round robin fashion over the
-	  available interfaces. This policy makes sense if the connections 
-	  should be primarily distributed on interfaces and not on routes. 
-
 config IP_ROUTE_VERBOSE
 	bool "IP: verbose route monitoring"
 	depends on IP_ADVANCED_ROUTER
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 4ff6c15..fbf1674 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -29,14 +29,9 @@ obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
 obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
 obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
 obj-$(CONFIG_IP_PNP) += ipconfig.o
-obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o
-obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o
-obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o
-obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o
 obj-$(CONFIG_NETFILTER)	+= netfilter.o netfilter/
 obj-$(CONFIG_IP_VS) += ipvs/
 obj-$(CONFIG_INET_DIAG) += inet_diag.o 
-obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
 obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
 obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
 obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 311d633..2eb909b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -453,7 +453,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
 	[RTA_PROTOINFO]		= { .type = NLA_U32 },
 	[RTA_FLOW]		= { .type = NLA_U32 },
-	[RTA_MP_ALGO]		= { .type = NLA_U32 },
 };
 
 static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -515,9 +514,6 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 		case RTA_FLOW:
 			cfg->fc_flow = nla_get_u32(attr);
 			break;
-		case RTA_MP_ALGO:
-			cfg->fc_mp_alg = nla_get_u32(attr);
-			break;
 		case RTA_TABLE:
 			cfg->fc_table = nla_get_u32(attr);
 			break;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index bb94550..c434119 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -42,7 +42,6 @@
 #include <net/tcp.h>
 #include <net/sock.h>
 #include <net/ip_fib.h>
-#include <net/ip_mp_alg.h>
 #include <net/netlink.h>
 #include <net/nexthop.h>
 
@@ -697,13 +696,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 			goto err_inval;
 	}
 #endif
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	if (cfg->fc_mp_alg) {
-		if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
-		    cfg->fc_mp_alg > IP_MP_ALG_MAX)
-			goto err_inval;
-	}
-#endif
 
 	err = -ENOBUFS;
 	if (fib_info_cnt >= fib_hash_size) {
@@ -791,10 +783,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 #endif
 	}
 
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	fi->fib_mp_alg = cfg->fc_mp_alg;
-#endif
-
 	if (fib_props[cfg->fc_type].error) {
 		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
 			goto err_inval;
@@ -940,10 +928,6 @@ out_fill_res:
 	res->type = fa->fa_type;
 	res->scope = fa->fa_scope;
 	res->fi = fa->fa_info;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	res->netmask = mask;
-	res->network = zone & inet_make_mask(prefixlen);
-#endif
 	atomic_inc(&res->fi->fib_clntref);
 	return 0;
 }
diff --git a/net/ipv4/multipath.c b/net/ipv4/multipath.c
deleted file mode 100644
index 4e9ca7c..0000000
--- a/net/ipv4/multipath.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/* multipath.c: IPV4 multipath algorithm support.
- *
- * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-
-#include <net/ip_mp_alg.h>
-
-static DEFINE_SPINLOCK(alg_table_lock);
-struct ip_mp_alg_ops *ip_mp_alg_table[IP_MP_ALG_MAX + 1];
-
-int multipath_alg_register(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
-{
-	struct ip_mp_alg_ops **slot;
-	int err;
-
-	if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX ||
-	    !ops->mp_alg_select_route)
-		return -EINVAL;
-
-	spin_lock(&alg_table_lock);
-	slot = &ip_mp_alg_table[n];
-	if (*slot != NULL) {
-		err = -EBUSY;
-	} else {
-		*slot = ops;
-		err = 0;
-	}
-	spin_unlock(&alg_table_lock);
-
-	return err;
-}
-EXPORT_SYMBOL(multipath_alg_register);
-
-void multipath_alg_unregister(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
-{
-	struct ip_mp_alg_ops **slot;
-
-	if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX)
-		return;
-
-	spin_lock(&alg_table_lock);
-	slot = &ip_mp_alg_table[n];
-	if (*slot == ops)
-		*slot = NULL;
-	spin_unlock(&alg_table_lock);
-
-	synchronize_net();
-}
-EXPORT_SYMBOL(multipath_alg_unregister);
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
deleted file mode 100644
index b03c5ca..0000000
--- a/net/ipv4/multipath_drr.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- *              Device round robin policy for multipath.
- *
- *
- * Version:	$Id: multipath_drr.c,v 1.1.2.1 2004/09/16 07:42:34 elueck Exp $
- *
- * Authors:	Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- */
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/igmp.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/mroute.h>
-#include <linux/init.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/raw.h>
-#include <linux/notifier.h>
-#include <linux/if_arp.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/ipip.h>
-#include <net/checksum.h>
-#include <net/ip_mp_alg.h>
-
-struct multipath_device {
-	int		ifi; /* interface index of device */
-	atomic_t	usecount;
-	int 		allocated;
-};
-
-#define MULTIPATH_MAX_DEVICECANDIDATES 10
-
-static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES];
-static DEFINE_SPINLOCK(state_lock);
-
-static int inline __multipath_findslot(void)
-{
-	int i;
-
-	for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) {
-		if (state[i].allocated == 0)
-			return i;
-	}
-	return -1;
-}
-
-static int inline __multipath_finddev(int ifindex)
-{
-	int i;
-
-	for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) {
-		if (state[i].allocated != 0 &&
-		    state[i].ifi == ifindex)
-			return i;
-	}
-	return -1;
-}
-
-static int drr_dev_event(struct notifier_block *this,
-			 unsigned long event, void *ptr)
-{
-	struct net_device *dev = ptr;
-	int devidx;
-
-	switch (event) {
-	case NETDEV_UNREGISTER:
-	case NETDEV_DOWN:
-		spin_lock_bh(&state_lock);
-
-		devidx = __multipath_finddev(dev->ifindex);
-		if (devidx != -1) {
-			state[devidx].allocated = 0;
-			state[devidx].ifi = 0;
-			atomic_set(&state[devidx].usecount, 0);
-		}
-
-		spin_unlock_bh(&state_lock);
-		break;
-	}
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block drr_dev_notifier = {
-	.notifier_call	= drr_dev_event,
-};
-
-
-static void drr_safe_inc(atomic_t *usecount)
-{
-	int n;
-
-	atomic_inc(usecount);
-
-	n = atomic_read(usecount);
-	if (n <= 0) {
-		int i;
-
-		spin_lock_bh(&state_lock);
-
-		for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++)
-			atomic_set(&state[i].usecount, 0);
-
-		spin_unlock_bh(&state_lock);
-	}
-}
-
-static void drr_select_route(const struct flowi *flp,
-			     struct rtable *first, struct rtable **rp)
-{
-	struct rtable *nh, *result, *cur_min;
-	int min_usecount = -1;
-	int devidx = -1;
-	int cur_min_devidx = -1;
-
-	/* 1. make sure all alt. nexthops have the same GC related data */
-	/* 2. determine the new candidate to be returned */
-	result = NULL;
-	cur_min = NULL;
-	for (nh = rcu_dereference(first); nh;
-	     nh = rcu_dereference(nh->u.dst.rt_next)) {
-		if ((nh->u.dst.flags & DST_BALANCED) != 0 &&
-		    multipath_comparekeys(&nh->fl, flp)) {
-			int nh_ifidx = nh->u.dst.dev->ifindex;
-
-			nh->u.dst.lastuse = jiffies;
-			nh->u.dst.__use++;
-			if (result != NULL)
-				continue;
-
-			/* search for the output interface */
-
-			/* this is not SMP safe, only add/remove are
-			 * SMP safe as wrong usecount updates have no big
-			 * impact
-			 */
-			devidx = __multipath_finddev(nh_ifidx);
-			if (devidx == -1) {
-				/* add the interface to the array
-				 * SMP safe
-				 */
-				spin_lock_bh(&state_lock);
-
-				/* due to SMP: search again */
-				devidx = __multipath_finddev(nh_ifidx);
-				if (devidx == -1) {
-					/* add entry for device */
-					devidx = __multipath_findslot();
-					if (devidx == -1) {
-						/* unlikely but possible */
-						continue;
-					}
-
-					state[devidx].allocated = 1;
-					state[devidx].ifi = nh_ifidx;
-					atomic_set(&state[devidx].usecount, 0);
-					min_usecount = 0;
-				}
-
-				spin_unlock_bh(&state_lock);
-			}
-
-			if (min_usecount == 0) {
-				/* if the device has not been used it is
-				 * the primary target
-				 */
-				drr_safe_inc(&state[devidx].usecount);
-				result = nh;
-			} else {
-				int count =
-					atomic_read(&state[devidx].usecount);
-
-				if (min_usecount == -1 ||
-				    count < min_usecount) {
-					cur_min = nh;
-					cur_min_devidx = devidx;
-					min_usecount = count;
-				}
-			}
-		}
-	}
-
-	if (!result) {
-		if (cur_min) {
-			drr_safe_inc(&state[cur_min_devidx].usecount);
-			result = cur_min;
-		} else {
-			result = first;
-		}
-	}
-
-	*rp = result;
-}
-
-static struct ip_mp_alg_ops drr_ops = {
-	.mp_alg_select_route	=	drr_select_route,
-};
-
-static int __init drr_init(void)
-{
-	int err = register_netdevice_notifier(&drr_dev_notifier);
-
-	if (err)
-		return err;
-
-	err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR);
-	if (err)
-		goto fail;
-
-	return 0;
-
-fail:
-	unregister_netdevice_notifier(&drr_dev_notifier);
-	return err;
-}
-
-static void __exit drr_exit(void)
-{
-	unregister_netdevice_notifier(&drr_dev_notifier);
-	multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR);
-}
-
-module_init(drr_init);
-module_exit(drr_exit);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c
deleted file mode 100644
index c312785..0000000
--- a/net/ipv4/multipath_random.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- *              Random policy for multipath.
- *
- *
- * Version:	$Id: multipath_random.c,v 1.1.2.3 2004/09/21 08:42:11 elueck Exp $
- *
- * Authors:	Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- */
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/igmp.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/mroute.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/raw.h>
-#include <linux/notifier.h>
-#include <linux/if_arp.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/ipip.h>
-#include <net/checksum.h>
-#include <net/ip_mp_alg.h>
-
-#define MULTIPATH_MAX_CANDIDATES 40
-
-static void random_select_route(const struct flowi *flp,
-				struct rtable *first,
-				struct rtable **rp)
-{
-	struct rtable *rt;
-	struct rtable *decision;
-	unsigned char candidate_count = 0;
-
-	/* count all candidate */
-	for (rt = rcu_dereference(first); rt;
-	     rt = rcu_dereference(rt->u.dst.rt_next)) {
-		if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
-		    multipath_comparekeys(&rt->fl, flp))
-			++candidate_count;
-	}
-
-	/* choose a random candidate */
-	decision = first;
-	if (candidate_count > 1) {
-		unsigned char i = 0;
-		unsigned char candidate_no = (unsigned char)
-			(random32() % candidate_count);
-
-		/* find chosen candidate and adjust GC data for all candidates
-		 * to ensure they stay in cache
-		 */
-		for (rt = first; rt; rt = rt->u.dst.rt_next) {
-			if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
-			    multipath_comparekeys(&rt->fl, flp)) {
-				rt->u.dst.lastuse = jiffies;
-
-				if (i == candidate_no)
-					decision = rt;
-
-				if (i >= candidate_count)
-					break;
-
-				i++;
-			}
-		}
-	}
-
-	decision->u.dst.__use++;
-	*rp = decision;
-}
-
-static struct ip_mp_alg_ops random_ops = {
-	.mp_alg_select_route	=	random_select_route,
-};
-
-static int __init random_init(void)
-{
-	return multipath_alg_register(&random_ops, IP_MP_ALG_RANDOM);
-}
-
-static void __exit random_exit(void)
-{
-	multipath_alg_unregister(&random_ops, IP_MP_ALG_RANDOM);
-}
-
-module_init(random_init);
-module_exit(random_exit);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c
deleted file mode 100644
index 0ad2252..0000000
--- a/net/ipv4/multipath_rr.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- *              Round robin policy for multipath.
- *
- *
- * Version:	$Id: multipath_rr.c,v 1.1.2.2 2004/09/16 07:42:34 elueck Exp $
- *
- * Authors:	Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- */
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/igmp.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/mroute.h>
-#include <linux/init.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/raw.h>
-#include <linux/notifier.h>
-#include <linux/if_arp.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/ipip.h>
-#include <net/checksum.h>
-#include <net/ip_mp_alg.h>
-
-static void rr_select_route(const struct flowi *flp,
-			    struct rtable *first, struct rtable **rp)
-{
-	struct rtable *nh, *result, *min_use_cand = NULL;
-	int min_use = -1;
-
-	/* 1. make sure all alt. nexthops have the same GC related data
-	 * 2. determine the new candidate to be returned
-	 */
-	result = NULL;
-	for (nh = rcu_dereference(first); nh;
-	     nh = rcu_dereference(nh->u.dst.rt_next)) {
-		if ((nh->u.dst.flags & DST_BALANCED) != 0 &&
-		    multipath_comparekeys(&nh->fl, flp)) {
-			nh->u.dst.lastuse = jiffies;
-
-			if (min_use == -1 || nh->u.dst.__use < min_use) {
-				min_use = nh->u.dst.__use;
-				min_use_cand = nh;
-			}
-		}
-	}
-	result = min_use_cand;
-	if (!result)
-		result = first;
-
-	result->u.dst.__use++;
-	*rp = result;
-}
-
-static struct ip_mp_alg_ops rr_ops = {
-	.mp_alg_select_route	=	rr_select_route,
-};
-
-static int __init rr_init(void)
-{
-	return multipath_alg_register(&rr_ops, IP_MP_ALG_RR);
-}
-
-static void __exit rr_exit(void)
-{
-	multipath_alg_unregister(&rr_ops, IP_MP_ALG_RR);
-}
-
-module_init(rr_init);
-module_exit(rr_exit);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c
deleted file mode 100644
index 57c5036..0000000
--- a/net/ipv4/multipath_wrandom.c
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- *              Weighted random policy for multipath.
- *
- *
- * Version:	$Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $
- *
- * Authors:	Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- */
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/igmp.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/mroute.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/raw.h>
-#include <linux/notifier.h>
-#include <linux/if_arp.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/ipip.h>
-#include <net/checksum.h>
-#include <net/ip_fib.h>
-#include <net/ip_mp_alg.h>
-
-#define MULTIPATH_STATE_SIZE 15
-
-struct multipath_candidate {
-	struct multipath_candidate	*next;
-	int				power;
-	struct rtable			*rt;
-};
-
-struct multipath_dest {
-	struct list_head	list;
-
-	const struct fib_nh	*nh_info;
-	__be32			netmask;
-	__be32			network;
-	unsigned char		prefixlen;
-
-	struct rcu_head		rcu;
-};
-
-struct multipath_bucket {
-	struct list_head	head;
-	spinlock_t		lock;
-};
-
-struct multipath_route {
-	struct list_head	list;
-
-	int			oif;
-	__be32			gw;
-	struct list_head	dests;
-
-	struct rcu_head		rcu;
-};
-
-/* state: primarily weight per route information */
-static struct multipath_bucket state[MULTIPATH_STATE_SIZE];
-
-static unsigned char __multipath_lookup_weight(const struct flowi *fl,
-					       const struct rtable *rt)
-{
-	const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE;
-	struct multipath_route *r;
-	struct multipath_route *target_route = NULL;
-	struct multipath_dest *d;
-	int weight = 1;
-
-	/* lookup the weight information for a certain route */
-	rcu_read_lock();
-
-	/* find state entry for gateway or add one if necessary */
-	list_for_each_entry_rcu(r, &state[state_idx].head, list) {
-		if (r->gw == rt->rt_gateway &&
-		    r->oif == rt->idev->dev->ifindex) {
-			target_route = r;
-			break;
-		}
-	}
-
-	if (!target_route) {
-		/* this should not happen... but we are prepared */
-		printk( KERN_CRIT"%s: missing state for gateway: %u and " \
-			"device %d\n", __FUNCTION__, rt->rt_gateway,
-			rt->idev->dev->ifindex);
-		goto out;
-	}
-
-	/* find state entry for destination */
-	list_for_each_entry_rcu(d, &target_route->dests, list) {
-		__be32 targetnetwork = fl->fl4_dst &
-			inet_make_mask(d->prefixlen);
-
-		if ((targetnetwork & d->netmask) == d->network) {
-			weight = d->nh_info->nh_weight;
-			goto out;
-		}
-	}
-
-out:
-	rcu_read_unlock();
-	return weight;
-}
-
-static void wrandom_init_state(void)
-{
-	int i;
-
-	for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
-		INIT_LIST_HEAD(&state[i].head);
-		spin_lock_init(&state[i].lock);
-	}
-}
-
-static void wrandom_select_route(const struct flowi *flp,
-				 struct rtable *first,
-				 struct rtable **rp)
-{
-	struct rtable *rt;
-	struct rtable *decision;
-	struct multipath_candidate *first_mpc = NULL;
-	struct multipath_candidate *mpc, *last_mpc = NULL;
-	int power = 0;
-	int last_power;
-	int selector;
-	const size_t size_mpc = sizeof(struct multipath_candidate);
-
-	/* collect all candidates and identify their weights */
-	for (rt = rcu_dereference(first); rt;
-	     rt = rcu_dereference(rt->u.dst.rt_next)) {
-		if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
-		    multipath_comparekeys(&rt->fl, flp)) {
-			struct multipath_candidate* mpc =
-				(struct multipath_candidate*)
-				kmalloc(size_mpc, GFP_ATOMIC);
-
-			if (!mpc)
-				return;
-
-			power += __multipath_lookup_weight(flp, rt) * 10000;
-
-			mpc->power = power;
-			mpc->rt = rt;
-			mpc->next = NULL;
-
-			if (!first_mpc)
-				first_mpc = mpc;
-			else
-				last_mpc->next = mpc;
-
-			last_mpc = mpc;
-		}
-	}
-
-	/* choose a weighted random candidate */
-	decision = first;
-	selector = random32() % power;
-	last_power = 0;
-
-	/* select candidate, adjust GC data and cleanup local state */
-	decision = first;
-	last_mpc = NULL;
-	for (mpc = first_mpc; mpc; mpc = mpc->next) {
-		mpc->rt->u.dst.lastuse = jiffies;
-		if (last_power <= selector && selector < mpc->power)
-			decision = mpc->rt;
-
-		last_power = mpc->power;
-		kfree(last_mpc);
-		last_mpc = mpc;
-	}
-
-	/* concurrent __multipath_flush may lead to !last_mpc */
-	kfree(last_mpc);
-
-	decision->u.dst.__use++;
-	*rp = decision;
-}
-
-static void wrandom_set_nhinfo(__be32 network,
-			       __be32 netmask,
-			       unsigned char prefixlen,
-			       const struct fib_nh *nh)
-{
-	const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE;
-	struct multipath_route *r, *target_route = NULL;
-	struct multipath_dest *d, *target_dest = NULL;
-
-	/* store the weight information for a certain route */
-	spin_lock_bh(&state[state_idx].lock);
-
-	/* find state entry for gateway or add one if necessary */
-	list_for_each_entry_rcu(r, &state[state_idx].head, list) {
-		if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) {
-			target_route = r;
-			break;
-		}
-	}
-
-	if (!target_route) {
-		const size_t size_rt = sizeof(struct multipath_route);
-		target_route = (struct multipath_route *)
-			kmalloc(size_rt, GFP_ATOMIC);
-
-		target_route->gw = nh->nh_gw;
-		target_route->oif = nh->nh_oif;
-		memset(&target_route->rcu, 0, sizeof(struct rcu_head));
-		INIT_LIST_HEAD(&target_route->dests);
-
-		list_add_rcu(&target_route->list, &state[state_idx].head);
-	}
-
-	/* find state entry for destination or add one if necessary */
-	list_for_each_entry_rcu(d, &target_route->dests, list) {
-		if (d->nh_info == nh) {
-			target_dest = d;
-			break;
-		}
-	}
-
-	if (!target_dest) {
-		const size_t size_dst = sizeof(struct multipath_dest);
-		target_dest = (struct multipath_dest*)
-			kmalloc(size_dst, GFP_ATOMIC);
-
-		target_dest->nh_info = nh;
-		target_dest->network = network;
-		target_dest->netmask = netmask;
-		target_dest->prefixlen = prefixlen;
-		memset(&target_dest->rcu, 0, sizeof(struct rcu_head));
-
-		list_add_rcu(&target_dest->list, &target_route->dests);
-	}
-	/* else: we already stored this info for another destination =>
-	 * we are finished
-	 */
-
-	spin_unlock_bh(&state[state_idx].lock);
-}
-
-static void __multipath_free(struct rcu_head *head)
-{
-	struct multipath_route *rt = container_of(head, struct multipath_route,
-						  rcu);
-	kfree(rt);
-}
-
-static void __multipath_free_dst(struct rcu_head *head)
-{
-	struct multipath_dest *dst = container_of(head,
-						  struct multipath_dest,
-						  rcu);
-	kfree(dst);
-}
-
-static void wrandom_flush(void)
-{
-	int i;
-
-	/* defere delete to all entries */
-	for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
-		struct multipath_route *r;
-
-		spin_lock_bh(&state[i].lock);
-		list_for_each_entry_rcu(r, &state[i].head, list) {
-			struct multipath_dest *d;
-			list_for_each_entry_rcu(d, &r->dests, list) {
-				list_del_rcu(&d->list);
-				call_rcu(&d->rcu,
-					 __multipath_free_dst);
-			}
-			list_del_rcu(&r->list);
-			call_rcu(&r->rcu,
-				 __multipath_free);
-		}
-
-		spin_unlock_bh(&state[i].lock);
-	}
-}
-
-static struct ip_mp_alg_ops wrandom_ops = {
-	.mp_alg_select_route	=	wrandom_select_route,
-	.mp_alg_flush		=	wrandom_flush,
-	.mp_alg_set_nhinfo	=	wrandom_set_nhinfo,
-};
-
-static int __init wrandom_init(void)
-{
-	wrandom_init_state();
-
-	return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM);
-}
-
-static void __exit wrandom_exit(void)
-{
-	multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM);
-}
-
-module_init(wrandom_init);
-module_exit(wrandom_exit);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 29ca63e..8528502 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -101,7 +101,6 @@
 #include <net/tcp.h>
 #include <net/icmp.h>
 #include <net/xfrm.h>
-#include <net/ip_mp_alg.h>
 #include <net/netevent.h>
 #include <net/rtnetlink.h>
 #ifdef CONFIG_SYSCTL
@@ -495,13 +494,11 @@ static const struct file_operations rt_cpu_seq_fops = {
 
 static __inline__ void rt_free(struct rtable *rt)
 {
-	multipath_remove(rt);
 	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
 }
 
 static __inline__ void rt_drop(struct rtable *rt)
 {
-	multipath_remove(rt);
 	ip_rt_put(rt);
 	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
 }
@@ -574,52 +571,6 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 		(fl1->iif ^ fl2->iif)) == 0;
 }
 
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-static struct rtable **rt_remove_balanced_route(struct rtable **chain_head,
-						struct rtable *expentry,
-						int *removed_count)
-{
-	int passedexpired = 0;
-	struct rtable **nextstep = NULL;
-	struct rtable **rthp = chain_head;
-	struct rtable *rth;
-
-	if (removed_count)
-		*removed_count = 0;
-
-	while ((rth = *rthp) != NULL) {
-		if (rth == expentry)
-			passedexpired = 1;
-
-		if (((*rthp)->u.dst.flags & DST_BALANCED) != 0  &&
-		    compare_keys(&(*rthp)->fl, &expentry->fl)) {
-			if (*rthp == expentry) {
-				*rthp = rth->u.dst.rt_next;
-				continue;
-			} else {
-				*rthp = rth->u.dst.rt_next;
-				rt_free(rth);
-				if (removed_count)
-					++(*removed_count);
-			}
-		} else {
-			if (!((*rthp)->u.dst.flags & DST_BALANCED) &&
-			    passedexpired && !nextstep)
-				nextstep = &rth->u.dst.rt_next;
-
-			rthp = &rth->u.dst.rt_next;
-		}
-	}
-
-	rt_free(expentry);
-	if (removed_count)
-		++(*removed_count);
-
-	return nextstep;
-}
-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
-
-
 /* This runs via a timer and thus is always in BH context. */
 static void rt_check_expire(unsigned long dummy)
 {
@@ -658,22 +609,8 @@ static void rt_check_expire(unsigned long dummy)
 			}
 
 			/* Cleanup aged off entries. */
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-			/* remove all related balanced entries if necessary */
-			if (rth->u.dst.flags & DST_BALANCED) {
-				rthp = rt_remove_balanced_route(
-					&rt_hash_table[i].chain,
-					rth, NULL);
-				if (!rthp)
-					break;
-			} else {
-				*rthp = rth->u.dst.rt_next;
-				rt_free(rth);
-			}
-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
 			*rthp = rth->u.dst.rt_next;
 			rt_free(rth);
-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
 		}
 		spin_unlock(rt_hash_lock_addr(i));
 
@@ -721,9 +658,6 @@ void rt_cache_flush(int delay)
 	if (delay < 0)
 		delay = ip_rt_min_delay;
 
-	/* flush existing multipath state*/
-	multipath_flush();
-
 	spin_lock_bh(&rt_flush_lock);
 
 	if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
@@ -842,30 +776,9 @@ static int rt_garbage_collect(void)
 					rthp = &rth->u.dst.rt_next;
 					continue;
 				}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-				/* remove all related balanced entries
-				 * if necessary
-				 */
-				if (rth->u.dst.flags & DST_BALANCED) {
-					int r;
-
-					rthp = rt_remove_balanced_route(
-						&rt_hash_table[k].chain,
-						rth,
-						&r);
-					goal -= r;
-					if (!rthp)
-						break;
-				} else {
-					*rthp = rth->u.dst.rt_next;
-					rt_free(rth);
-					goal--;
-				}
-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
 				*rthp = rth->u.dst.rt_next;
 				rt_free(rth);
 				goal--;
-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
 			}
 			spin_unlock_bh(rt_hash_lock_addr(k));
 			if (goal <= 0)
@@ -939,12 +852,7 @@ restart:
 
 	spin_lock_bh(rt_hash_lock_addr(hash));
 	while ((rth = *rthp) != NULL) {
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-		if (!(rth->u.dst.flags & DST_BALANCED) &&
-		    compare_keys(&rth->fl, &rt->fl)) {
-#else
 		if (compare_keys(&rth->fl, &rt->fl)) {
-#endif
 			/* Put it first */
 			*rthp = rth->u.dst.rt_next;
 			/*
@@ -1774,10 +1682,6 @@ static inline int __mkroute_input(struct sk_buff *skb,
 
 	atomic_set(&rth->u.dst.__refcnt, 1);
 	rth->u.dst.flags= DST_HOST;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	if (res->fi->fib_nhs > 1)
-		rth->u.dst.flags |= DST_BALANCED;
-#endif
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
 		rth->u.dst.flags |= DST_NOPOLICY;
 	if (IN_DEV_CONF_GET(out_dev, NOXFRM))
@@ -1812,11 +1716,11 @@ static inline int __mkroute_input(struct sk_buff *skb,
 	return err;
 }
 
-static inline int ip_mkroute_input_def(struct sk_buff *skb,
-				       struct fib_result* res,
-				       const struct flowi *fl,
-				       struct in_device *in_dev,
-				       __be32 daddr, __be32 saddr, u32 tos)
+static inline int ip_mkroute_input(struct sk_buff *skb,
+				   struct fib_result* res,
+				   const struct flowi *fl,
+				   struct in_device *in_dev,
+				   __be32 daddr, __be32 saddr, u32 tos)
 {
 	struct rtable* rth = NULL;
 	int err;
@@ -1837,63 +1741,6 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb,
 	return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
 }
 
-static inline int ip_mkroute_input(struct sk_buff *skb,
-				   struct fib_result* res,
-				   const struct flowi *fl,
-				   struct in_device *in_dev,
-				   __be32 daddr, __be32 saddr, u32 tos)
-{
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	struct rtable* rth = NULL, *rtres;
-	unsigned char hop, hopcount;
-	int err = -EINVAL;
-	unsigned int hash;
-
-	if (res->fi)
-		hopcount = res->fi->fib_nhs;
-	else
-		hopcount = 1;
-
-	/* distinguish between multipath and singlepath */
-	if (hopcount < 2)
-		return ip_mkroute_input_def(skb, res, fl, in_dev, daddr,
-					    saddr, tos);
-
-	/* add all alternatives to the routing cache */
-	for (hop = 0; hop < hopcount; hop++) {
-		res->nh_sel = hop;
-
-		/* put reference to previous result */
-		if (hop)
-			ip_rt_put(rtres);
-
-		/* create a routing cache entry */
-		err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos,
-				      &rth);
-		if (err)
-			return err;
-
-		/* put it into the cache */
-		hash = rt_hash(daddr, saddr, fl->iif);
-		err = rt_intern_hash(hash, rth, &rtres);
-		if (err)
-			return err;
-
-		/* forward hop information to multipath impl. */
-		multipath_set_nhinfo(rth,
-				     FIB_RES_NETWORK(*res),
-				     FIB_RES_NETMASK(*res),
-				     res->prefixlen,
-				     &FIB_RES_NH(*res));
-	}
-	skb->dst = &rtres->u.dst;
-	return err;
-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED  */
-	return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos);
-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED  */
-}
-
-
 /*
  *	NOTE. We drop all the packets that has local source
  *	addresses, because every properly looped back packet
@@ -2211,13 +2058,6 @@ static inline int __mkroute_output(struct rtable **result,
 
 	atomic_set(&rth->u.dst.__refcnt, 1);
 	rth->u.dst.flags= DST_HOST;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	if (res->fi) {
-		rth->rt_multipath_alg = res->fi->fib_mp_alg;
-		if (res->fi->fib_nhs > 1)
-			rth->u.dst.flags |= DST_BALANCED;
-	}
-#endif
 	if (IN_DEV_CONF_GET(in_dev, NOXFRM))
 		rth->u.dst.flags |= DST_NOXFRM;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
@@ -2277,12 +2117,12 @@ static inline int __mkroute_output(struct rtable **result,
 	return err;
 }
 
-static inline int ip_mkroute_output_def(struct rtable **rp,
-					struct fib_result* res,
-					const struct flowi *fl,
-					const struct flowi *oldflp,
-					struct net_device *dev_out,
-					unsigned flags)
+static inline int ip_mkroute_output(struct rtable **rp,
+				    struct fib_result* res,
+				    const struct flowi *fl,
+				    const struct flowi *oldflp,
+				    struct net_device *dev_out,
+				    unsigned flags)
 {
 	struct rtable *rth = NULL;
 	int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
@@ -2295,68 +2135,6 @@ static inline int ip_mkroute_output_def(struct rtable **rp,
 	return err;
 }
 
-static inline int ip_mkroute_output(struct rtable** rp,
-				    struct fib_result* res,
-				    const struct flowi *fl,
-				    const struct flowi *oldflp,
-				    struct net_device *dev_out,
-				    unsigned flags)
-{
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	unsigned char hop;
-	unsigned hash;
-	int err = -EINVAL;
-	struct rtable *rth = NULL;
-
-	if (res->fi && res->fi->fib_nhs > 1) {
-		unsigned char hopcount = res->fi->fib_nhs;
-
-		for (hop = 0; hop < hopcount; hop++) {
-			struct net_device *dev2nexthop;
-
-			res->nh_sel = hop;
-
-			/* hold a work reference to the output device */
-			dev2nexthop = FIB_RES_DEV(*res);
-			dev_hold(dev2nexthop);
-
-			/* put reference to previous result */
-			if (hop)
-				ip_rt_put(*rp);
-
-			err = __mkroute_output(&rth, res, fl, oldflp,
-					       dev2nexthop, flags);
-
-			if (err != 0)
-				goto cleanup;
-
-			hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src,
-					oldflp->oif);
-			err = rt_intern_hash(hash, rth, rp);
-
-			/* forward hop information to multipath impl. */
-			multipath_set_nhinfo(rth,
-					     FIB_RES_NETWORK(*res),
-					     FIB_RES_NETMASK(*res),
-					     res->prefixlen,
-					     &FIB_RES_NH(*res));
-		cleanup:
-			/* release work reference to output device */
-			dev_put(dev2nexthop);
-
-			if (err != 0)
-				return err;
-		}
-		return err;
-	} else {
-		return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out,
-					     flags);
-	}
-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
-	return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags);
-#endif
-}
-
 /*
  * Major route resolver routine.
  */
@@ -2570,17 +2348,6 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
 		    rth->fl.mark == flp->mark &&
 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
 			    (IPTOS_RT_MASK | RTO_ONLINK))) {
-
-			/* check for multipath routes and choose one if
-			 * necessary
-			 */
-			if (multipath_select_route(flp, rth, rp)) {
-				dst_hold(&(*rp)->u.dst);
-				RT_CACHE_STAT_INC(out_hit);
-				rcu_read_unlock_bh();
-				return 0;
-			}
-
 			rth->u.dst.lastuse = jiffies;
 			dst_hold(&rth->u.dst);
 			rth->u.dst.__use++;
@@ -2729,10 +2496,6 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 	if (rt->u.dst.tclassid)
 		NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
 #endif
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	if (rt->rt_multipath_alg != IP_MP_ALG_NONE)
-		NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg);
-#endif
 	if (rt->fl.iif)
 		NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
 	else if (rt->rt_src != rt->fl.fl4_src)
-- 
cgit v0.10.2


From f3ec75f627c746cfe460482d38a33b06a84d038f Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Sun, 10 Jun 2007 17:24:20 -0700
Subject: [TIPC]: Improved support for Ethernet traffic filtering

This patch simplifies TIPC's Ethernet receive routine to take
advantage of information already present in each incoming sk_buff
indicating whether the packet was explicitly sent to the interface,
has been broadcast to all interfaces, or was picked up because the
interface is in promiscous mode.

This new approach also fixes the problem of TIPC accepting unwanted
traffic through UML's multicast-based Ethernet interfaces (which
deliver traffic in a promiscuous manner even if the interface is
not configured to be promiscuous).

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Jon Paul Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 77d2d9c..711ca4b 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -1,8 +1,8 @@
 /*
  * net/tipc/eth_media.c: Ethernet bearer support for TIPC
  *
- * Copyright (c) 2001-2006, Ericsson AB
- * Copyright (c) 2005-2006, Wind River Systems
+ * Copyright (c) 2001-2007, Ericsson AB
+ * Copyright (c) 2005-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -87,6 +87,9 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
 /**
  * recv_msg - handle incoming TIPC message from an Ethernet interface
  *
+ * Accept only packets explicitly sent to this node, or broadcast packets;
+ * ignores packets sent using Ethernet multicast, and traffic sent to other
+ * nodes (which can happen if interface is running in promiscuous mode).
  * Routine truncates any Ethernet padding/CRC appended to the message,
  * and ensures message size matches actual length
  */
@@ -98,9 +101,7 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
 	u32 size;
 
 	if (likely(eb_ptr->bearer)) {
-	       if (likely(!dev->promiscuity) ||
-		   !memcmp(skb_mac_header(buf), dev->dev_addr, ETH_ALEN) ||
-		   !memcmp(skb_mac_header(buf), dev->broadcast, ETH_ALEN)) {
+		if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
 			size = msg_size((struct tipc_msg *)buf->data);
 			skb_trim(buf, size);
 			if (likely(buf->len == size)) {
-- 
cgit v0.10.2


From 5eee6a6dc945acc5bf4da12956b2f698bbb102b9 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Sun, 10 Jun 2007 17:24:55 -0700
Subject: [TIPC]: Use standard socket "not implemented" routines

This patch modifies TIPC's socket API to utilize existing
generic routines to indicate unsupported operations, rather
than adding similar TIPC-specific routines.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Jon Paul Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 45832fb..ac7f2aa 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1,8 +1,8 @@
 /*
  * net/tipc/socket.c: TIPC socket API
  *
- * Copyright (c) 2001-2006, Ericsson AB
- * Copyright (c) 2004-2006, Wind River Systems
+ * Copyright (c) 2001-2007, Ericsson AB
+ * Copyright (c) 2004-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1600,33 +1600,6 @@ static int getsockopt(struct socket *sock,
 }
 
 /**
- * Placeholders for non-implemented functionality
- *
- * Returns error code (POSIX-compliant where defined)
- */
-
-static int ioctl(struct socket *s, u32 cmd, unsigned long arg)
-{
-	return -EINVAL;
-}
-
-static int no_mmap(struct file *file, struct socket *sock,
-		   struct vm_area_struct *vma)
-{
-	return -EINVAL;
-}
-static ssize_t no_sendpage(struct socket *sock, struct page *page,
-			   int offset, size_t size, int flags)
-{
-	return -EINVAL;
-}
-
-static int no_skpair(struct socket *s1, struct socket *s2)
-{
-	return -EOPNOTSUPP;
-}
-
-/**
  * Protocol switches for the various types of TIPC sockets
  */
 
@@ -1636,19 +1609,19 @@ static struct proto_ops msg_ops = {
 	.release	= release,
 	.bind		= bind,
 	.connect	= connect,
-	.socketpair	= no_skpair,
+	.socketpair	= sock_no_socketpair,
 	.accept		= accept,
 	.getname	= get_name,
 	.poll		= poll,
-	.ioctl		= ioctl,
+	.ioctl		= sock_no_ioctl,
 	.listen		= listen,
 	.shutdown	= shutdown,
 	.setsockopt	= setsockopt,
 	.getsockopt	= getsockopt,
 	.sendmsg	= send_msg,
 	.recvmsg	= recv_msg,
-	.mmap		= no_mmap,
-	.sendpage	= no_sendpage
+        .mmap		= sock_no_mmap,
+        .sendpage	= sock_no_sendpage
 };
 
 static struct proto_ops packet_ops = {
@@ -1657,19 +1630,19 @@ static struct proto_ops packet_ops = {
 	.release	= release,
 	.bind		= bind,
 	.connect	= connect,
-	.socketpair	= no_skpair,
+	.socketpair	= sock_no_socketpair,
 	.accept		= accept,
 	.getname	= get_name,
 	.poll		= poll,
-	.ioctl		= ioctl,
+	.ioctl		= sock_no_ioctl,
 	.listen		= listen,
 	.shutdown	= shutdown,
 	.setsockopt	= setsockopt,
 	.getsockopt	= getsockopt,
 	.sendmsg	= send_packet,
 	.recvmsg	= recv_msg,
-	.mmap		= no_mmap,
-	.sendpage	= no_sendpage
+        .mmap		= sock_no_mmap,
+        .sendpage	= sock_no_sendpage
 };
 
 static struct proto_ops stream_ops = {
@@ -1678,19 +1651,19 @@ static struct proto_ops stream_ops = {
 	.release	= release,
 	.bind		= bind,
 	.connect	= connect,
-	.socketpair	= no_skpair,
+	.socketpair	= sock_no_socketpair,
 	.accept		= accept,
 	.getname	= get_name,
 	.poll		= poll,
-	.ioctl		= ioctl,
+	.ioctl		= sock_no_ioctl,
 	.listen		= listen,
 	.shutdown	= shutdown,
 	.setsockopt	= setsockopt,
 	.getsockopt	= getsockopt,
 	.sendmsg	= send_stream,
 	.recvmsg	= recv_stream,
-	.mmap		= no_mmap,
-	.sendpage	= no_sendpage
+        .mmap		= sock_no_mmap,
+        .sendpage	= sock_no_sendpage
 };
 
 static struct net_proto_family tipc_family_ops = {
-- 
cgit v0.10.2


From 05646c91109bfd129361d57dc5d98464ab6f6578 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Sun, 10 Jun 2007 17:25:24 -0700
Subject: [TIPC]: Optimize stream send routine to avoid fragmentation

This patch enhances TIPC's stream socket send routine so that
it avoids transmitting data in chunks that require fragmentation
and reassembly, thereby improving performance at both the
sending and receiving ends of the connection.

The "maximum packet size" hint that records MTU info allows
the socket to decide how big a chunk it should send; in the
event that the hint has become stale, fragmentation may still
occur, but the data will be passed correctly and the hint will
be updated in time for the following send.  Note: The 66060 byte
pseudo-MTU used for intra-node connections requires the send
routine to perform an additional check to ensure it does not
exceed TIPC"s limit of 66000 bytes of user data per chunk.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Jon Paul Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/tipc/tipc_port.h b/include/net/tipc/tipc_port.h
index 333bba6..cfc4ba4 100644
--- a/include/net/tipc/tipc_port.h
+++ b/include/net/tipc/tipc_port.h
@@ -1,8 +1,8 @@
 /*
  * include/net/tipc/tipc_port.h: Include file for privileged access to TIPC ports
  * 
- * Copyright (c) 1994-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 1994-2007, Ericsson AB
+ * Copyright (c) 2005-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -55,6 +55,7 @@
  * @conn_unacked: number of unacknowledged messages received from peer port
  * @published: non-zero if port has one or more associated names
  * @congested: non-zero if cannot send because of link or port congestion
+ * @max_pkt: maximum packet size "hint" used when building messages sent by port
  * @ref: unique reference to port in TIPC object registry
  * @phdr: preformatted message header used when sending messages
  */
@@ -68,6 +69,7 @@ struct tipc_port {
 	u32 conn_unacked;
 	int published;
 	u32 congested;
+	u32 max_pkt;
 	u32 ref;
 	struct tipc_msg phdr;
 };
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2124f32..5adfdfd 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1,8 +1,8 @@
 /*
  * net/tipc/link.c: TIPC link code
  *
- * Copyright (c) 1996-2006, Ericsson AB
- * Copyright (c) 2004-2006, Wind River Systems
+ * Copyright (c) 1996-2007, Ericsson AB
+ * Copyright (c) 2004-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1260,7 +1260,7 @@ again:
 	 * (Must not hold any locks while building message.)
 	 */
 
-	res = msg_build(hdr, msg_sect, num_sect, sender->max_pkt,
+	res = msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt,
 			!sender->user_port, &buf);
 
 	read_lock_bh(&tipc_net_lock);
@@ -1271,7 +1271,7 @@ again:
 		if (likely(l_ptr)) {
 			if (likely(buf)) {
 				res = link_send_buf_fast(l_ptr, buf,
-							 &sender->max_pkt);
+							 &sender->publ.max_pkt);
 				if (unlikely(res < 0))
 					buf_discard(buf);
 exit:
@@ -1299,12 +1299,12 @@ exit:
 			 * then re-try fast path or fragment the message
 			 */
 
-			sender->max_pkt = link_max_pkt(l_ptr);
+			sender->publ.max_pkt = link_max_pkt(l_ptr);
 			tipc_node_unlock(node);
 			read_unlock_bh(&tipc_net_lock);
 
 
-			if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt)
+			if ((msg_hdr_sz(hdr) + res) <= sender->publ.max_pkt)
 				goto again;
 
 			return link_send_sections_long(sender, msg_sect,
@@ -1357,7 +1357,7 @@ static int link_send_sections_long(struct port *sender,
 
 again:
 	fragm_no = 1;
-	max_pkt = sender->max_pkt - INT_H_SIZE;
+	max_pkt = sender->publ.max_pkt - INT_H_SIZE;
 		/* leave room for tunnel header in case of link changeover */
 	fragm_sz = max_pkt - INT_H_SIZE;
 		/* leave room for fragmentation header in each fragment */
@@ -1463,7 +1463,7 @@ error:
 			goto reject;
 		}
 		if (link_max_pkt(l_ptr) < max_pkt) {
-			sender->max_pkt = link_max_pkt(l_ptr);
+			sender->publ.max_pkt = link_max_pkt(l_ptr);
 			tipc_node_unlock(node);
 			for (; buf_chain; buf_chain = buf) {
 				buf = buf_chain->next;
diff --git a/net/tipc/port.c b/net/tipc/port.c
index bcd5da0..5d2b9ce 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -1,8 +1,8 @@
 /*
  * net/tipc/port.c: TIPC port code
  *
- * Copyright (c) 1992-2006, Ericsson AB
- * Copyright (c) 2004-2005, Wind River Systems
+ * Copyright (c) 1992-2007, Ericsson AB
+ * Copyright (c) 2004-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -239,6 +239,8 @@ u32 tipc_createport_raw(void *usr_handle,
 	}
 
 	tipc_port_lock(ref);
+	p_ptr->publ.usr_handle = usr_handle;
+	p_ptr->publ.max_pkt = MAX_PKT_DEFAULT;
 	p_ptr->publ.ref = ref;
 	msg = &p_ptr->publ.phdr;
 	msg_init(msg, DATA_LOW, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, 0);
@@ -248,11 +250,9 @@ u32 tipc_createport_raw(void *usr_handle,
 	msg_set_importance(msg,importance);
 	p_ptr->last_in_seqno = 41;
 	p_ptr->sent = 1;
-	p_ptr->publ.usr_handle = usr_handle;
 	INIT_LIST_HEAD(&p_ptr->wait_list);
 	INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
 	p_ptr->congested_link = NULL;
-	p_ptr->max_pkt = MAX_PKT_DEFAULT;
 	p_ptr->dispatcher = dispatcher;
 	p_ptr->wakeup = wakeup;
 	p_ptr->user_port = NULL;
@@ -1243,7 +1243,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer)
 	res = TIPC_OK;
 exit:
 	tipc_port_unlock(p_ptr);
-	p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref);
+	p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref);
 	return res;
 }
 
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 7ef4d64..e5f8c16 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -1,8 +1,8 @@
 /*
  * net/tipc/port.h: Include file for TIPC port code
  *
- * Copyright (c) 1994-2006, Ericsson AB
- * Copyright (c) 2004-2005, Wind River Systems
+ * Copyright (c) 1994-2007, Ericsson AB
+ * Copyright (c) 2004-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -81,7 +81,6 @@ struct user_port {
  * @acked:
  * @publications: list of publications for port
  * @pub_count: total # of publications port has made during its lifetime
- * @max_pkt: maximum packet size "hint" used when building messages sent by port
  * @probing_state:
  * @probing_interval:
  * @last_in_seqno:
@@ -102,7 +101,6 @@ struct port {
 	u32 acked;
 	struct list_head publications;
 	u32 pub_count;
-	u32 max_pkt;
 	u32 probing_state;
 	u32 probing_interval;
 	u32 last_in_seqno;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ac7f2aa..4a8f37f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -607,23 +607,24 @@ exit:
 static int send_stream(struct kiocb *iocb, struct socket *sock,
 		       struct msghdr *m, size_t total_len)
 {
+	struct tipc_port *tport;
 	struct msghdr my_msg;
 	struct iovec my_iov;
 	struct iovec *curr_iov;
 	int curr_iovlen;
 	char __user *curr_start;
+	u32 hdr_size;
 	int curr_left;
 	int bytes_to_send;
 	int bytes_sent;
 	int res;
 
-	if (likely(total_len <= TIPC_MAX_USER_MSG_SIZE))
-		return send_packet(iocb, sock, m, total_len);
-
-	/* Can only send large data streams if already connected */
+	/* Handle special cases where there is no connection */
 
 	if (unlikely(sock->state != SS_CONNECTED)) {
-		if (sock->state == SS_DISCONNECTING)
+		if (sock->state == SS_UNCONNECTED)
+			return send_packet(iocb, sock, m, total_len);
+		else if (sock->state == SS_DISCONNECTING)
 			return -EPIPE;
 		else
 			return -ENOTCONN;
@@ -648,17 +649,25 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
 	my_msg.msg_name = NULL;
 	bytes_sent = 0;
 
+	tport = tipc_sk(sock->sk)->p;
+	hdr_size = msg_hdr_sz(&tport->phdr);
+
 	while (curr_iovlen--) {
 		curr_start = curr_iov->iov_base;
 		curr_left = curr_iov->iov_len;
 
 		while (curr_left) {
-			bytes_to_send = (curr_left < TIPC_MAX_USER_MSG_SIZE)
-				? curr_left : TIPC_MAX_USER_MSG_SIZE;
+			bytes_to_send = tport->max_pkt - hdr_size;
+			if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE)
+				bytes_to_send = TIPC_MAX_USER_MSG_SIZE;
+			if (curr_left < bytes_to_send)
+				bytes_to_send = curr_left;
 			my_iov.iov_base = curr_start;
 			my_iov.iov_len = bytes_to_send;
 			if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) {
-				return bytes_sent ? bytes_sent : res;
+				if (bytes_sent != 0)
+					res = bytes_sent;
+				return res;
 			}
 			curr_left -= bytes_to_send;
 			curr_start += bytes_to_send;
-- 
cgit v0.10.2


From c716a81ab946c68a8d84022ee32eb14674e72650 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Sun, 10 Jun 2007 17:31:24 -0700
Subject: [NET_SCHED]: Cleanup readability of qdisc restart

Over the years this code has gotten hairier. Resulting in many long
discussions over long summer days and patches that get it wrong.
This patch helps tame that code so normal people will understand it.

Thanks to Thomas Graf, Peter J. waskiewicz Jr, and Patrick McHardy
for their valuable reviews.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f4d3448..9461e8a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -34,6 +34,9 @@
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 
+#define SCHED_TX_DROP -2
+#define SCHED_TX_QUEUE -3
+
 /* Main transmission queue. */
 
 /* Modifications to data participating in scheduling must be protected with
@@ -59,7 +62,74 @@ void qdisc_unlock_tree(struct net_device *dev)
 	spin_unlock_bh(&dev->queue_lock);
 }
 
+static inline int qdisc_qlen(struct Qdisc *q)
+{
+	BUG_ON((int) q->q.qlen < 0);
+	return q->q.qlen;
+}
+
+static inline int handle_dev_cpu_collision(struct net_device *dev)
+{
+	if (unlikely(dev->xmit_lock_owner == smp_processor_id())) {
+		if (net_ratelimit())
+			printk(KERN_WARNING
+			       "Dead loop on netdevice %s, fix it urgently!\n",
+			       dev->name);
+		return SCHED_TX_DROP;
+	}
+	__get_cpu_var(netdev_rx_stat).cpu_collision++;
+	return SCHED_TX_QUEUE;
+}
+
+static inline int
+do_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+{
+
+	if (unlikely(skb->next))
+		dev->gso_skb = skb;
+	else
+		q->ops->requeue(skb, q);
+	/* XXX: Could netif_schedule fail? Or is the fact we are
+	 * requeueing imply the hardware path is closed
+	 * and even if we fail, some interupt will wake us
+	 */
+	netif_schedule(dev);
+	return 0;
+}
+
+static inline struct sk_buff *
+try_get_tx_pkt(struct net_device *dev, struct Qdisc *q)
+{
+	struct sk_buff *skb = dev->gso_skb;
+
+	if (skb)
+		dev->gso_skb = NULL;
+	else
+		skb = q->dequeue(q);
+
+	return skb;
+}
+
+static inline int
+tx_islocked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+{
+	int ret = handle_dev_cpu_collision(dev);
+
+	if (ret == SCHED_TX_DROP) {
+		kfree_skb(skb);
+		return qdisc_qlen(q);
+	}
+
+	return do_dev_requeue(skb, dev, q);
+}
+
+
 /*
+   NOTE: Called under dev->queue_lock with locally disabled BH.
+
+   __LINK_STATE_QDISC_RUNNING guarantees only one CPU
+   can enter this region at a time.
+
    dev->queue_lock serializes queue accesses for this device
    AND dev->qdisc pointer itself.
 
@@ -67,116 +137,65 @@ void qdisc_unlock_tree(struct net_device *dev)
 
    dev->queue_lock and netif_tx_lock are mutually exclusive,
    if one is grabbed, another must be free.
- */
 
+   Multiple CPUs may contend for the two locks.
 
-/* Kick device.
+   Note, that this procedure can be called by a watchdog timer
 
+   Returns to the caller:
    Returns:  0  - queue is empty or throttled.
 	    >0  - queue is not empty.
 
-   NOTE: Called under dev->queue_lock with locally disabled BH.
 */
 
 static inline int qdisc_restart(struct net_device *dev)
 {
 	struct Qdisc *q = dev->qdisc;
+	unsigned lockless = (dev->features & NETIF_F_LLTX);
 	struct sk_buff *skb;
+	int ret;
 
-	/* Dequeue packet */
-	if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
-		unsigned nolock = (dev->features & NETIF_F_LLTX);
-
-		dev->gso_skb = NULL;
+	skb = try_get_tx_pkt(dev, q);
+	if (skb == NULL)
+		return 0;
 
-		/*
-		 * When the driver has LLTX set it does its own locking
-		 * in start_xmit. No need to add additional overhead by
-		 * locking again. These checks are worth it because
-		 * even uncongested locks can be quite expensive.
-		 * The driver can do trylock like here too, in case
-		 * of lock congestion it should return -1 and the packet
-		 * will be requeued.
-		 */
-		if (!nolock) {
-			if (!netif_tx_trylock(dev)) {
-			collision:
-				/* So, someone grabbed the driver. */
-
-				/* It may be transient configuration error,
-				   when hard_start_xmit() recurses. We detect
-				   it by checking xmit owner and drop the
-				   packet when deadloop is detected.
-				*/
-				if (dev->xmit_lock_owner == smp_processor_id()) {
-					kfree_skb(skb);
-					if (net_ratelimit())
-						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
-					goto out;
-				}
-				__get_cpu_var(netdev_rx_stat).cpu_collision++;
-				goto requeue;
-			}
-		}
+	/* we have a packet to send */
+	if (!lockless) {
+		if (!netif_tx_trylock(dev))
+			return tx_islocked(skb, dev, q);
+	}
+	/* all clear .. */
+	spin_unlock(&dev->queue_lock);
 
-		{
-			/* And release queue */
-			spin_unlock(&dev->queue_lock);
-
-			if (!netif_queue_stopped(dev)) {
-				int ret;
-
-				ret = dev_hard_start_xmit(skb, dev);
-				if (ret == NETDEV_TX_OK) {
-					if (!nolock) {
-						netif_tx_unlock(dev);
-					}
-					spin_lock(&dev->queue_lock);
-					q = dev->qdisc;
-					goto out;
-				}
-				if (ret == NETDEV_TX_LOCKED && nolock) {
-					spin_lock(&dev->queue_lock);
-					q = dev->qdisc;
-					goto collision;
-				}
-			}
+	ret = NETDEV_TX_BUSY;
+	if (!netif_queue_stopped(dev))
+		/* churn baby churn .. */
+		ret = dev_hard_start_xmit(skb, dev);
 
-			/* NETDEV_TX_BUSY - we need to requeue */
-			/* Release the driver */
-			if (!nolock) {
-				netif_tx_unlock(dev);
-			}
-			spin_lock(&dev->queue_lock);
-			q = dev->qdisc;
-		}
+	if (!lockless)
+		netif_tx_unlock(dev);
 
-		/* Device kicked us out :(
-		   This is possible in three cases:
-
-		   0. driver is locked
-		   1. fastroute is enabled
-		   2. device cannot determine busy state
-		      before start of transmission (f.e. dialout)
-		   3. device is buggy (ppp)
-		 */
-
-requeue:
-		if (unlikely(q == &noop_qdisc))
-			kfree_skb(skb);
-		else if (skb->next)
-			dev->gso_skb = skb;
-		else
-			q->ops->requeue(skb, q);
-		netif_schedule(dev);
-	}
-	return 0;
+	spin_lock(&dev->queue_lock);
 
-out:
-	BUG_ON((int) q->q.qlen < 0);
-	return q->q.qlen;
+	/* we need to refresh q because it may be invalid since
+	 * we dropped  dev->queue_lock earlier ...
+	 * So dont try to be clever grasshopper
+	 */
+	q = dev->qdisc;
+	/* most likely result, packet went ok */
+	if (ret == NETDEV_TX_OK)
+		return qdisc_qlen(q);
+	/* only for lockless drivers .. */
+	if (ret == NETDEV_TX_LOCKED && lockless)
+		return tx_islocked(skb, dev, q);
+
+	if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
+		printk(KERN_WARNING " BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);
+
+	return do_dev_requeue(skb, dev, q);
 }
 
+
 void __qdisc_run(struct net_device *dev)
 {
 	do {
-- 
cgit v0.10.2


From a7ab4b501f9b8a9dc4d5cee542db67b6ccd1088b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 10 Jun 2007 17:33:08 -0700
Subject: [TCPv4]: Improve BH latency in /proc/net/tcp

Currently the code for /proc/net/tcp disable BH while iterating
over the entire established hash table.  Even though we call
cond_resched_softirq for each entry, we still won't process
softirq's as regularly as we would otherwise do which results
in poor performance when the system is loaded near capacity.

This anomaly comes from the 2.4 code where this was all in a
single function and the local_bh_disable might have made sense
as a small optimisation.

The cost of each local_bh_disable is so small when compared
against the increased latency in keeping it disabled over a
large but mostly empty TCP established hash table that we
should just move it to the individual read_lock/read_unlock
calls as we do in inet_diag.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 354721d..3f5f742 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2045,10 +2045,7 @@ static void *established_get_first(struct seq_file *seq)
 		struct hlist_node *node;
 		struct inet_timewait_sock *tw;
 
-		/* We can reschedule _before_ having picked the target: */
-		cond_resched_softirq();
-
-		read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
+		read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
 			if (sk->sk_family != st->family) {
 				continue;
@@ -2065,7 +2062,7 @@ static void *established_get_first(struct seq_file *seq)
 			rc = tw;
 			goto out;
 		}
-		read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
+		read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 	}
 out:
@@ -2092,14 +2089,11 @@ get_tw:
 			cur = tw;
 			goto out;
 		}
-		read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
+		read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 
-		/* We can reschedule between buckets: */
-		cond_resched_softirq();
-
 		if (++st->bucket < tcp_hashinfo.ehash_size) {
-			read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
+			read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
 			sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
 		} else {
 			cur = NULL;
@@ -2144,7 +2138,6 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
 
 	if (!rc) {
 		inet_listen_unlock(&tcp_hashinfo);
-		local_bh_disable();
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 		rc	  = established_get_idx(seq, pos);
 	}
@@ -2177,7 +2170,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		rc = listening_get_next(seq, v);
 		if (!rc) {
 			inet_listen_unlock(&tcp_hashinfo);
-			local_bh_disable();
 			st->state = TCP_SEQ_STATE_ESTABLISHED;
 			rc	  = established_get_first(seq);
 		}
@@ -2209,8 +2201,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
 	case TCP_SEQ_STATE_TIME_WAIT:
 	case TCP_SEQ_STATE_ESTABLISHED:
 		if (v)
-			read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
-		local_bh_enable();
+			read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
 		break;
 	}
 }
-- 
cgit v0.10.2


From 8de0a15483b357d0f0b821330ec84d1660cadc4e Mon Sep 17 00:00:00 2001
From: Ville Tervo <ville.tervo@nokia.com>
Date: Wed, 11 Jul 2007 09:23:41 +0200
Subject: [Bluetooth] Keep rfcomm_dev on the list until it is freed

This patch changes the RFCOMM TTY release process so that the TTY is kept
on the list until it is really freed. A new device flag is used to keep
track of released TTYs.

Signed-off-by: Ville Tervo <ville.tervo@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 3c563f0..25aa575 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -323,6 +323,7 @@ int  rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc
 #define RFCOMM_RELEASE_ONHUP  1
 #define RFCOMM_HANGUP_NOW     2
 #define RFCOMM_TTY_ATTACHED   3
+#define RFCOMM_TTY_RELEASED   4
 
 struct rfcomm_dev_req {
 	s16      dev_id;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index ba469b0..23ba61a 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -95,6 +95,10 @@ static void rfcomm_dev_destruct(struct rfcomm_dev *dev)
 
 	BT_DBG("dev %p dlc %p", dev, dlc);
 
+	write_lock_bh(&rfcomm_dev_lock);
+	list_del_init(&dev->list);
+	write_unlock_bh(&rfcomm_dev_lock);
+
 	rfcomm_dlc_lock(dlc);
 	/* Detach DLC if it's owned by this dev */
 	if (dlc->owner == dev)
@@ -156,8 +160,13 @@ static inline struct rfcomm_dev *rfcomm_dev_get(int id)
 	read_lock(&rfcomm_dev_lock);
 
 	dev = __rfcomm_dev_get(id);
-	if (dev)
-		rfcomm_dev_hold(dev);
+
+	if (dev) {
+		if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags))
+			dev = NULL;
+		else
+			rfcomm_dev_hold(dev);
+	}
 
 	read_unlock(&rfcomm_dev_lock);
 
@@ -265,6 +274,12 @@ out:
 
 	dev->tty_dev = tty_register_device(rfcomm_tty_driver, dev->id, NULL);
 
+	if (IS_ERR(dev->tty_dev)) {
+		list_del(&dev->list);
+		kfree(dev);
+		return PTR_ERR(dev->tty_dev);
+	}
+
 	return dev->id;
 }
 
@@ -272,10 +287,7 @@ static void rfcomm_dev_del(struct rfcomm_dev *dev)
 {
 	BT_DBG("dev %p", dev);
 
-	write_lock_bh(&rfcomm_dev_lock);
-	list_del_init(&dev->list);
-	write_unlock_bh(&rfcomm_dev_lock);
-
+	set_bit(RFCOMM_TTY_RELEASED, &dev->flags);
 	rfcomm_dev_put(dev);
 }
 
@@ -329,7 +341,7 @@ static int rfcomm_create_dev(struct sock *sk, void __user *arg)
 	if (copy_from_user(&req, arg, sizeof(req)))
 		return -EFAULT;
 
-	BT_DBG("sk %p dev_id %id flags 0x%x", sk, req.dev_id, req.flags);
+	BT_DBG("sk %p dev_id %d flags 0x%x", sk, req.dev_id, req.flags);
 
 	if (req.flags != NOCAP_FLAGS && !capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -370,7 +382,7 @@ static int rfcomm_release_dev(void __user *arg)
 	if (copy_from_user(&req, arg, sizeof(req)))
 		return -EFAULT;
 
-	BT_DBG("dev_id %id flags 0x%x", req.dev_id, req.flags);
+	BT_DBG("dev_id %d flags 0x%x", req.dev_id, req.flags);
 
 	if (!(dev = rfcomm_dev_get(req.dev_id)))
 		return -ENODEV;
@@ -419,6 +431,8 @@ static int rfcomm_get_dev_list(void __user *arg)
 
 	list_for_each(p, &rfcomm_dev_list) {
 		struct rfcomm_dev *dev = list_entry(p, struct rfcomm_dev, list);
+		if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags))
+			continue;
 		(di + n)->id      = dev->id;
 		(di + n)->flags   = dev->flags;
 		(di + n)->state   = dev->dlc->state;
-- 
cgit v0.10.2


From 8c7b7faaa630fef7f68d8728cee1cce398cc9697 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 10 Jul 2007 22:08:12 -0700
Subject: [NET]: Kill eth_copy_and_sum().

It hasn't "summed" anything in over 7 years, and it's
just a straight mempcy ala skb_copy_to_linear_data()
so just get rid of it.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/ppc/8260_io/enet.c b/arch/ppc/8260_io/enet.c
index 4c0a7d7..615b658 100644
--- a/arch/ppc/8260_io/enet.c
+++ b/arch/ppc/8260_io/enet.c
@@ -477,9 +477,9 @@ for (;;) {
 		}
 		else {
 			skb_put(skb,pkt_len-4);	/* Make room */
-			eth_copy_and_sum(skb,
+			skb_copy_to_linear_data(skb,
 				(unsigned char *)__va(bdp->cbd_bufaddr),
-				pkt_len-4, 0);
+				pkt_len-4);
 			skb->protocol=eth_type_trans(skb,dev);
 			netif_rx(skb);
 		}
diff --git a/arch/ppc/8260_io/fcc_enet.c b/arch/ppc/8260_io/fcc_enet.c
index cab395d..6f3ed6a 100644
--- a/arch/ppc/8260_io/fcc_enet.c
+++ b/arch/ppc/8260_io/fcc_enet.c
@@ -734,9 +734,9 @@ for (;;) {
 		}
 		else {
 			skb_put(skb,pkt_len);	/* Make room */
-			eth_copy_and_sum(skb,
+			skb_copy_to_linear_data(skb,
 				(unsigned char *)__va(bdp->cbd_bufaddr),
-				pkt_len, 0);
+				pkt_len);
 			skb->protocol=eth_type_trans(skb,dev);
 			netif_rx(skb);
 		}
diff --git a/arch/ppc/8xx_io/enet.c b/arch/ppc/8xx_io/enet.c
index e58288e..703d47e 100644
--- a/arch/ppc/8xx_io/enet.c
+++ b/arch/ppc/8xx_io/enet.c
@@ -506,9 +506,9 @@ for (;;) {
 		}
 		else {
 			skb_put(skb,pkt_len-4);	/* Make room */
-			eth_copy_and_sum(skb,
+			skb_copy_to_linear_data(skb,
 				cep->rx_vaddr[bdp - cep->rx_bd_base],
-				pkt_len-4, 0);
+				pkt_len-4);
 			skb->protocol=eth_type_trans(skb,dev);
 			netif_rx(skb);
 		}
diff --git a/arch/ppc/8xx_io/fec.c b/arch/ppc/8xx_io/fec.c
index d38335d..0288279 100644
--- a/arch/ppc/8xx_io/fec.c
+++ b/arch/ppc/8xx_io/fec.c
@@ -725,7 +725,7 @@ while (!(bdp->cbd_sc & BD_ENET_RX_EMPTY)) {
 		fep->stats.rx_dropped++;
 	} else {
 		skb_put(skb,pkt_len-4);	/* Make room */
-		eth_copy_and_sum(skb, data, pkt_len-4, 0);
+		skb_copy_to_linear_data(skb, data, pkt_len-4);
 		skb->protocol=eth_type_trans(skb,dev);
 		netif_rx(skb);
 	}
diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c
index da1a22c..ab18343 100644
--- a/drivers/net/3c523.c
+++ b/drivers/net/3c523.c
@@ -990,7 +990,7 @@ static void elmc_rcv_int(struct net_device *dev)
 				if (skb != NULL) {
 					skb_reserve(skb, 2);	/* 16 byte alignment */
 					skb_put(skb,totlen);
-					eth_copy_and_sum(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen,0);
+					skb_copy_to_linear_data(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen);
 					skb->protocol = eth_type_trans(skb, dev);
 					netif_rx(skb);
 					dev->last_rx = jiffies;
diff --git a/drivers/net/7990.c b/drivers/net/7990.c
index 0877fc3..e89ace1 100644
--- a/drivers/net/7990.c
+++ b/drivers/net/7990.c
@@ -333,9 +333,9 @@ static int lance_rx (struct net_device *dev)
 
                         skb_reserve (skb, 2);           /* 16 byte align */
                         skb_put (skb, len);             /* make room */
-                        eth_copy_and_sum(skb,
+                        skb_copy_to_linear_data(skb,
                                          (unsigned char *)&(ib->rx_buf [lp->rx_new][0]),
-                                         len, 0);
+                                         len);
                         skb->protocol = eth_type_trans (skb, dev);
 			netif_rx (skb);
 			dev->last_rx = jiffies;
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index a844b1f..21a6ccb 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -2017,7 +2017,7 @@ no_early_rx:
 #if RX_BUF_IDX == 3
 			wrap_copy(skb, rx_ring, ring_offset+4, pkt_size);
 #else
-			eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);
+			skb_copy_to_linear_data (skb, &rx_ring[ring_offset + 4], pkt_size);
 #endif
 			skb_put (skb, pkt_size);
 
diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c
index 81d5a37..a45de69 100644
--- a/drivers/net/a2065.c
+++ b/drivers/net/a2065.c
@@ -322,9 +322,9 @@ static int lance_rx (struct net_device *dev)
 
 			skb_reserve (skb, 2);		/* 16 byte align */
 			skb_put (skb, len);		/* make room */
-			eth_copy_and_sum(skb,
+			skb_copy_to_linear_data(skb,
 					 (unsigned char *)&(ib->rx_buf [lp->rx_new][0]),
-					 len, 0);
+					 len);
 			skb->protocol = eth_type_trans (skb, dev);
 			netif_rx (skb);
 			dev->last_rx = jiffies;
diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c
index a241ae7..bc5a38a 100644
--- a/drivers/net/ariadne.c
+++ b/drivers/net/ariadne.c
@@ -746,7 +746,7 @@ static int ariadne_rx(struct net_device *dev)
 
 	    skb_reserve(skb,2);		/* 16 byte align */
 	    skb_put(skb,pkt_len);	/* Make room */
-	    eth_copy_and_sum(skb, (char *)priv->rx_buff[entry], pkt_len,0);
+	    skb_copy_to_linear_data(skb, (char *)priv->rx_buff[entry], pkt_len);
 	    skb->protocol=eth_type_trans(skb,dev);
 #if 0
 	    printk(KERN_DEBUG "RX pkt type 0x%04x from ",
diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c
index 2438c5b..f6ece1d 100644
--- a/drivers/net/arm/ep93xx_eth.c
+++ b/drivers/net/arm/ep93xx_eth.c
@@ -258,7 +258,7 @@ static int ep93xx_rx(struct net_device *dev, int *budget)
 			skb_reserve(skb, 2);
 			dma_sync_single(NULL, ep->descs->rdesc[entry].buf_addr,
 						length, DMA_FROM_DEVICE);
-			eth_copy_and_sum(skb, ep->rx_buf[entry], length, 0);
+			skb_copy_to_linear_data(skb, ep->rx_buf[entry], length);
 			skb_put(skb, length);
 			skb->protocol = eth_type_trans(skb, dev);
 
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index c27cfce..e86b369 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -1205,8 +1205,8 @@ static int au1000_rx(struct net_device *dev)
 				continue;
 			}
 			skb_reserve(skb, 2);	/* 16 byte IP header align */
-			eth_copy_and_sum(skb,
-				(unsigned char *)pDB->vaddr, frmlen, 0);
+			skb_copy_to_linear_data(skb,
+				(unsigned char *)pDB->vaddr, frmlen);
 			skb_put(skb, frmlen);
 			skb->protocol = eth_type_trans(skb, dev);
 			netif_rx(skb);	/* pass the packet to upper layers */
diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c
index 74ec64a..a4ace07 100644
--- a/drivers/net/dl2k.c
+++ b/drivers/net/dl2k.c
@@ -866,9 +866,9 @@ receive_packet (struct net_device *dev)
 							    PCI_DMA_FROMDEVICE);
 				/* 16 byte align the IP header */
 				skb_reserve (skb, 2);
-				eth_copy_and_sum (skb,
+				skb_copy_to_linear_data (skb,
 						  np->rx_skbuff[entry]->data,
-						  pkt_len, 0);
+						  pkt_len);
 				skb_put (skb, pkt_len);
 				pci_dma_sync_single_for_device(np->pdev,
 				  			       desc->fraginfo &
diff --git a/drivers/net/eepro100.c b/drivers/net/eepro100.c
index 98003419..9afa47e 100644
--- a/drivers/net/eepro100.c
+++ b/drivers/net/eepro100.c
@@ -1801,7 +1801,7 @@ speedo_rx(struct net_device *dev)
 
 #if 1 || USE_IP_CSUM
 				/* Packet is in one chunk -- we can copy + cksum. */
-				eth_copy_and_sum(skb, sp->rx_skbuff[entry]->data, pkt_len, 0);
+				skb_copy_to_linear_data(skb, sp->rx_skbuff[entry]->data, pkt_len);
 				skb_put(skb, pkt_len);
 #else
 				skb_copy_from_linear_data(sp->rx_skbuff[entry],
diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c
index 5e51794..1197784 100644
--- a/drivers/net/epic100.c
+++ b/drivers/net/epic100.c
@@ -1201,7 +1201,7 @@ static int epic_rx(struct net_device *dev, int budget)
 							    ep->rx_ring[entry].bufaddr,
 							    ep->rx_buf_sz,
 							    PCI_DMA_FROMDEVICE);
-				eth_copy_and_sum(skb, ep->rx_skbuff[entry]->data, pkt_len, 0);
+				skb_copy_to_linear_data(skb, ep->rx_skbuff[entry]->data, pkt_len);
 				skb_put(skb, pkt_len);
 				pci_dma_sync_single_for_device(ep->pci_dev,
 							       ep->rx_ring[entry].bufaddr,
diff --git a/drivers/net/fealnx.c b/drivers/net/fealnx.c
index abe9b08..ff9f177 100644
--- a/drivers/net/fealnx.c
+++ b/drivers/net/fealnx.c
@@ -1727,8 +1727,8 @@ static int netdev_rx(struct net_device *dev)
 				/* Call copy + cksum if available. */
 
 #if ! defined(__alpha__)
-				eth_copy_and_sum(skb,
-					np->cur_rx->skbuff->data, pkt_len, 0);
+				skb_copy_to_linear_data(skb,
+					np->cur_rx->skbuff->data, pkt_len);
 				skb_put(skb, pkt_len);
 #else
 				memcpy(skb_put(skb, pkt_len),
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 255b091..03023dd 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -648,7 +648,7 @@ while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
 		fep->stats.rx_dropped++;
 	} else {
 		skb_put(skb,pkt_len-4);	/* Make room */
-		eth_copy_and_sum(skb, data, pkt_len-4, 0);
+		skb_copy_to_linear_data(skb, data, pkt_len-4);
 		skb->protocol=eth_type_trans(skb,dev);
 		netif_rx(skb);
 	}
diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
index 2521b11..15254dc 100644
--- a/drivers/net/hamachi.c
+++ b/drivers/net/hamachi.c
@@ -1575,8 +1575,8 @@ static int hamachi_rx(struct net_device *dev)
 							    PCI_DMA_FROMDEVICE);
 				/* Call copy + cksum if available. */
 #if 1 || USE_IP_COPYSUM
-				eth_copy_and_sum(skb,
-					hmp->rx_skbuff[entry]->data, pkt_len, 0);
+				skb_copy_to_linear_data(skb,
+					hmp->rx_skbuff[entry]->data, pkt_len);
 				skb_put(skb, pkt_len);
 #else
 				memcpy(skb_put(skb, pkt_len), hmp->rx_ring_dma
diff --git a/drivers/net/ixp2000/ixpdev.c b/drivers/net/ixp2000/ixpdev.c
index d5f694f..d9ce1ae 100644
--- a/drivers/net/ixp2000/ixpdev.c
+++ b/drivers/net/ixp2000/ixpdev.c
@@ -111,7 +111,7 @@ static int ixpdev_rx(struct net_device *dev, int *budget)
 		skb = dev_alloc_skb(desc->pkt_length + 2);
 		if (likely(skb != NULL)) {
 			skb_reserve(skb, 2);
-			eth_copy_and_sum(skb, buf, desc->pkt_length, 0);
+			skb_copy_to_linear_data(skb, buf, desc->pkt_length);
 			skb_put(skb, desc->pkt_length);
 			skb->protocol = eth_type_trans(skb, nds[desc->channel]);
 
diff --git a/drivers/net/lance.c b/drivers/net/lance.c
index 0fe96c8..a2f37e5 100644
--- a/drivers/net/lance.c
+++ b/drivers/net/lance.c
@@ -1186,9 +1186,9 @@ lance_rx(struct net_device *dev)
 				}
 				skb_reserve(skb,2);	/* 16 byte align */
 				skb_put(skb,pkt_len);	/* Make room */
-				eth_copy_and_sum(skb,
+				skb_copy_to_linear_data(skb,
 					(unsigned char *)isa_bus_to_virt((lp->rx_ring[entry].base & 0x00ffffff)),
-					pkt_len,0);
+					pkt_len);
 				skb->protocol=eth_type_trans(skb,dev);
 				netif_rx(skb);
 				dev->last_rx = jiffies;
diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
index 460a0871..3450051 100644
--- a/drivers/net/natsemi.c
+++ b/drivers/net/natsemi.c
@@ -2357,8 +2357,8 @@ static void netdev_rx(struct net_device *dev, int *work_done, int work_to_do)
 					np->rx_dma[entry],
 					buflen,
 					PCI_DMA_FROMDEVICE);
-				eth_copy_and_sum(skb,
-					np->rx_skbuff[entry]->data, pkt_len, 0);
+				skb_copy_to_linear_data(skb,
+					np->rx_skbuff[entry]->data, pkt_len);
 				skb_put(skb, pkt_len);
 				pci_dma_sync_single_for_device(np->pci_dev,
 					np->rx_dma[entry],
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index 8dbd6d1..5e7999d 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -936,7 +936,7 @@ static void ni52_rcv_int(struct net_device *dev)
 					{
 						skb_reserve(skb,2);
 						skb_put(skb,totlen);
-						eth_copy_and_sum(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen,0);
+						skb_copy_to_linear_data(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen);
 						skb->protocol=eth_type_trans(skb,dev);
 						netif_rx(skb);
 						dev->last_rx = jiffies;
diff --git a/drivers/net/ni65.c b/drivers/net/ni65.c
index 3818edf..4ef5fe3 100644
--- a/drivers/net/ni65.c
+++ b/drivers/net/ni65.c
@@ -1096,7 +1096,7 @@ static void ni65_recv_intr(struct net_device *dev,int csr0)
 #ifdef RCV_VIA_SKB
 				if( (unsigned long) (skb->data + R_BUF_SIZE) > 0x1000000) {
 					skb_put(skb,len);
-					eth_copy_and_sum(skb, (unsigned char *)(p->recv_skb[p->rmdnum]->data),len,0);
+					skb_copy_to_linear_data(skb, (unsigned char *)(p->recv_skb[p->rmdnum]->data),len);
 				}
 				else {
 					struct sk_buff *skb1 = p->recv_skb[p->rmdnum];
@@ -1108,7 +1108,7 @@ static void ni65_recv_intr(struct net_device *dev,int csr0)
 				}
 #else
 				skb_put(skb,len);
-				eth_copy_and_sum(skb, (unsigned char *) p->recvbounce[p->rmdnum],len,0);
+				skb_copy_to_linear_data(skb, (unsigned char *) p->recvbounce[p->rmdnum],len);
 #endif
 				p->stats.rx_packets++;
 				p->stats.rx_bytes += len;
diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c
index df8998b..3cdbe118 100644
--- a/drivers/net/pci-skeleton.c
+++ b/drivers/net/pci-skeleton.c
@@ -1567,7 +1567,7 @@ static void netdrv_rx_interrupt (struct net_device *dev,
 		if (skb) {
 			skb_reserve (skb, 2);	/* 16 byte align the IP fields. */
 
-			eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);
+			skb_copy_to_linear_data (skb, &rx_ring[ring_offset + 4], pkt_size);
 			skb_put (skb, pkt_size);
 
 			skb->protocol = eth_type_trans (skb, dev);
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 9c171a7..465485a 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -1235,9 +1235,9 @@ static void pcnet32_rx_entry(struct net_device *dev,
 					    lp->rx_dma_addr[entry],
 					    pkt_len,
 					    PCI_DMA_FROMDEVICE);
-		eth_copy_and_sum(skb,
+		skb_copy_to_linear_data(skb,
 				 (unsigned char *)(lp->rx_skbuff[entry]->data),
-				 pkt_len, 0);
+				 pkt_len);
 		pci_dma_sync_single_for_device(lp->pci_dev,
 					       lp->rx_dma_addr[entry],
 					       pkt_len,
diff --git a/drivers/net/saa9730.c b/drivers/net/saa9730.c
index ad94358..451486b 100644
--- a/drivers/net/saa9730.c
+++ b/drivers/net/saa9730.c
@@ -690,9 +690,9 @@ static int lan_saa9730_rx(struct net_device *dev)
 				lp->stats.rx_packets++;
 				skb_reserve(skb, 2);	/* 16 byte align */
 				skb_put(skb, len);	/* make room */
-				eth_copy_and_sum(skb,
+				skb_copy_to_linear_data(skb,
 						 (unsigned char *) pData,
-						 len, 0);
+						 len);
 				skb->protocol = eth_type_trans(skb, dev);
 				netif_rx(skb);
 				dev->last_rx = jiffies;
diff --git a/drivers/net/sgiseeq.c b/drivers/net/sgiseeq.c
index 2106bec..384b468 100644
--- a/drivers/net/sgiseeq.c
+++ b/drivers/net/sgiseeq.c
@@ -320,7 +320,7 @@ static inline void sgiseeq_rx(struct net_device *dev, struct sgiseeq_private *sp
 				skb_put(skb, len);
 
 				/* Copy out of kseg1 to avoid silly cache flush. */
-				eth_copy_and_sum(skb, pkt_pointer + 2, len, 0);
+				skb_copy_to_linear_data(skb, pkt_pointer + 2, len);
 				skb->protocol = eth_type_trans(skb, dev);
 
 				/* We don't want to receive our own packets */
diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c
index bc8de48..ec2ad9f 100644
--- a/drivers/net/sis190.c
+++ b/drivers/net/sis190.c
@@ -548,7 +548,7 @@ static inline int sis190_try_rx_copy(struct sk_buff **sk_buff, int pkt_size,
 		skb = dev_alloc_skb(pkt_size + NET_IP_ALIGN);
 		if (skb) {
 			skb_reserve(skb, NET_IP_ALIGN);
-			eth_copy_and_sum(skb, sk_buff[0]->data, pkt_size, 0);
+			skb_copy_to_linear_data(skb, sk_buff[0]->data, pkt_size);
 			*sk_buff = skb;
 			sis190_give_to_asic(desc, rx_buf_sz);
 			ret = 0;
diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index 786d4b9..f2e1019 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -1456,7 +1456,7 @@ static int __netdev_rx(struct net_device *dev, int *quota)
 			pci_dma_sync_single_for_cpu(np->pci_dev,
 						    np->rx_info[entry].mapping,
 						    pkt_len, PCI_DMA_FROMDEVICE);
-			eth_copy_and_sum(skb, np->rx_info[entry].skb->data, pkt_len, 0);
+			skb_copy_to_linear_data(skb, np->rx_info[entry].skb->data, pkt_len);
 			pci_dma_sync_single_for_device(np->pci_dev,
 						       np->rx_info[entry].mapping,
 						       pkt_len, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/sun3_82586.c b/drivers/net/sun3_82586.c
index a123ea8..b77ab6e 100644
--- a/drivers/net/sun3_82586.c
+++ b/drivers/net/sun3_82586.c
@@ -777,7 +777,7 @@ static void sun3_82586_rcv_int(struct net_device *dev)
 					{
 						skb_reserve(skb,2);
 						skb_put(skb,totlen);
-						eth_copy_and_sum(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen,0);
+						skb_copy_to_linear_data(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen);
 						skb->protocol=eth_type_trans(skb,dev);
 						netif_rx(skb);
 						p->stats.rx_packets++;
diff --git a/drivers/net/sun3lance.c b/drivers/net/sun3lance.c
index 791e081..f1548c0 100644
--- a/drivers/net/sun3lance.c
+++ b/drivers/net/sun3lance.c
@@ -853,10 +853,9 @@ static int lance_rx( struct net_device *dev )
 
 				skb_reserve( skb, 2 );	/* 16 byte align */
 				skb_put( skb, pkt_len );	/* Make room */
-//			        skb_copy_to_linear_data(skb, PKTBUF_ADDR(head), pkt_len);
-				eth_copy_and_sum(skb,
+				skb_copy_to_linear_data(skb,
 						 PKTBUF_ADDR(head),
-						 pkt_len, 0);
+						 pkt_len);
 
 				skb->protocol = eth_type_trans( skb, dev );
 				netif_rx( skb );
diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c
index 2ad8d58..b3e0158 100644
--- a/drivers/net/sunbmac.c
+++ b/drivers/net/sunbmac.c
@@ -860,7 +860,7 @@ static void bigmac_rx(struct bigmac *bp)
 			sbus_dma_sync_single_for_cpu(bp->bigmac_sdev,
 						     this->rx_addr, len,
 						     SBUS_DMA_FROMDEVICE);
-			eth_copy_and_sum(copy_skb, (unsigned char *)skb->data, len, 0);
+			skb_copy_to_linear_data(copy_skb, (unsigned char *)skb->data, len);
 			sbus_dma_sync_single_for_device(bp->bigmac_sdev,
 							this->rx_addr, len,
 							SBUS_DMA_FROMDEVICE);
diff --git a/drivers/net/sundance.c b/drivers/net/sundance.c
index e1f912d..c8ba534 100644
--- a/drivers/net/sundance.c
+++ b/drivers/net/sundance.c
@@ -1313,7 +1313,7 @@ static void rx_poll(unsigned long data)
 							    np->rx_buf_sz,
 							    PCI_DMA_FROMDEVICE);
 
-				eth_copy_and_sum(skb, np->rx_skbuff[entry]->data, pkt_len, 0);
+				skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len);
 				pci_dma_sync_single_for_device(np->pci_dev,
 							       desc->frag[0].addr,
 							       np->rx_buf_sz,
diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c
index 4272253..053b7cb 100644
--- a/drivers/net/sunlance.c
+++ b/drivers/net/sunlance.c
@@ -549,9 +549,9 @@ static void lance_rx_dvma(struct net_device *dev)
 
 			skb_reserve(skb, 2);		/* 16 byte align */
 			skb_put(skb, len);		/* make room */
-			eth_copy_and_sum(skb,
+			skb_copy_to_linear_data(skb,
 					 (unsigned char *)&(ib->rx_buf [entry][0]),
-					 len, 0);
+					 len);
 			skb->protocol = eth_type_trans(skb, dev);
 			netif_rx(skb);
 			dev->last_rx = jiffies;
diff --git a/drivers/net/sunqe.c b/drivers/net/sunqe.c
index fa70e0b..1b65ae8 100644
--- a/drivers/net/sunqe.c
+++ b/drivers/net/sunqe.c
@@ -439,8 +439,8 @@ static void qe_rx(struct sunqe *qep)
 			} else {
 				skb_reserve(skb, 2);
 				skb_put(skb, len);
-				eth_copy_and_sum(skb, (unsigned char *) this_qbuf,
-						 len, 0);
+				skb_copy_to_linear_data(skb, (unsigned char *) this_qbuf,
+						 len);
 				skb->protocol = eth_type_trans(skb, qep->dev);
 				netif_rx(skb);
 				qep->dev->last_rx = jiffies;
diff --git a/drivers/net/tulip/interrupt.c b/drivers/net/tulip/interrupt.c
index ea89677..53efd66 100644
--- a/drivers/net/tulip/interrupt.c
+++ b/drivers/net/tulip/interrupt.c
@@ -197,8 +197,8 @@ int tulip_poll(struct net_device *dev, int *budget)
 								   tp->rx_buffers[entry].mapping,
 								   pkt_len, PCI_DMA_FROMDEVICE);
 #if ! defined(__alpha__)
-                                       eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->data,
-                                                        pkt_len, 0);
+                                       skb_copy_to_linear_data(skb, tp->rx_buffers[entry].skb->data,
+                                                        pkt_len);
                                        skb_put(skb, pkt_len);
 #else
                                        memcpy(skb_put(skb, pkt_len),
@@ -420,8 +420,8 @@ static int tulip_rx(struct net_device *dev)
 							    tp->rx_buffers[entry].mapping,
 							    pkt_len, PCI_DMA_FROMDEVICE);
 #if ! defined(__alpha__)
-				eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->data,
-						 pkt_len, 0);
+				skb_copy_to_linear_data(skb, tp->rx_buffers[entry].skb->data,
+						 pkt_len);
 				skb_put(skb, pkt_len);
 #else
 				memcpy(skb_put(skb, pkt_len),
diff --git a/drivers/net/tulip/winbond-840.c b/drivers/net/tulip/winbond-840.c
index 38f3b99..5824f6a 100644
--- a/drivers/net/tulip/winbond-840.c
+++ b/drivers/net/tulip/winbond-840.c
@@ -1232,7 +1232,7 @@ static int netdev_rx(struct net_device *dev)
 				pci_dma_sync_single_for_cpu(np->pci_dev,np->rx_addr[entry],
 							    np->rx_skbuff[entry]->len,
 							    PCI_DMA_FROMDEVICE);
-				eth_copy_and_sum(skb, np->rx_skbuff[entry]->data, pkt_len, 0);
+				skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len);
 				skb_put(skb, pkt_len);
 				pci_dma_sync_single_for_device(np->pci_dev,np->rx_addr[entry],
 							       np->rx_skbuff[entry]->len,
diff --git a/drivers/net/tulip/xircom_cb.c b/drivers/net/tulip/xircom_cb.c
index 2470b1e..37e35cd 100644
--- a/drivers/net/tulip/xircom_cb.c
+++ b/drivers/net/tulip/xircom_cb.c
@@ -1208,7 +1208,7 @@ static void investigate_read_descriptor(struct net_device *dev,struct xircom_pri
 				goto out;
 			}
 			skb_reserve(skb, 2);
-			eth_copy_and_sum(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len, 0);
+			skb_copy_to_linear_data(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len);
 			skb_put(skb, pkt_len);
 			skb->protocol = eth_type_trans(skb, dev);
 			netif_rx(skb);
diff --git a/drivers/net/tulip/xircom_tulip_cb.c b/drivers/net/tulip/xircom_tulip_cb.c
index f641729..f984fbd 100644
--- a/drivers/net/tulip/xircom_tulip_cb.c
+++ b/drivers/net/tulip/xircom_tulip_cb.c
@@ -1242,8 +1242,8 @@ xircom_rx(struct net_device *dev)
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 #if ! defined(__alpha__)
-				eth_copy_and_sum(skb, bus_to_virt(tp->rx_ring[entry].buffer1),
-								 pkt_len, 0);
+				skb_copy_to_linear_data(skb, bus_to_virt(tp->rx_ring[entry].buffer1),
+								 pkt_len);
 				skb_put(skb, pkt_len);
 #else
 				memcpy(skb_put(skb, pkt_len),
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index 15b2fb8..df52454 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -1703,7 +1703,7 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile u32 * ready,
 			pci_dma_sync_single_for_cpu(tp->pdev, dma_addr,
 						    PKT_BUF_SZ,
 						    PCI_DMA_FROMDEVICE);
-			eth_copy_and_sum(new_skb, skb->data, pkt_len, 0);
+			skb_copy_to_linear_data(new_skb, skb->data, pkt_len);
 			pci_dma_sync_single_for_device(tp->pdev, dma_addr,
 						       PKT_BUF_SZ,
 						       PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index 86e90c5..76752d84 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -255,7 +255,7 @@ static void catc_rx_done(struct urb *urb)
 		if (!(skb = dev_alloc_skb(pkt_len)))
 			return;
 
-		eth_copy_and_sum(skb, pkt_start + pkt_offset, pkt_len, 0);
+		skb_copy_to_linear_data(skb, pkt_start + pkt_offset, pkt_len);
 		skb_put(skb, pkt_len);
 
 		skb->protocol = eth_type_trans(skb, catc->netdev);
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index 60d2944..524dc5f 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -635,7 +635,7 @@ static void kaweth_usb_receive(struct urb *urb)
 
 		skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
 
-		eth_copy_and_sum(skb, kaweth->rx_buf + 2, pkt_len, 0);
+		skb_copy_to_linear_data(skb, kaweth->rx_buf + 2, pkt_len);
 
 		skb_put(skb, pkt_len);
 
diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index adea290..565f6cc 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -1492,9 +1492,9 @@ static int rhine_rx(struct net_device *dev, int limit)
 							    rp->rx_buf_sz,
 							    PCI_DMA_FROMDEVICE);
 
-				eth_copy_and_sum(skb,
+				skb_copy_to_linear_data(skb,
 						 rp->rx_skbuff[entry]->data,
-						 pkt_len, 0);
+						 pkt_len);
 				skb_put(skb, pkt_len);
 				pci_dma_sync_single_for_device(rp->pdev,
 							       rp->rx_skbuff_dma[entry],
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index ce9230b..c8b5c22 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -1011,7 +1011,7 @@ static inline void wl3501_md_ind_interrupt(struct net_device *dev,
 	} else {
 		skb->dev = dev;
 		skb_reserve(skb, 2); /* IP headers on 16 bytes boundaries */
-		eth_copy_and_sum(skb, (unsigned char *)&sig.daddr, 12, 0);
+		skb_copy_to_linear_data(skb, (unsigned char *)&sig.daddr, 12);
 		wl3501_receive(this, skb->data, pkt_len);
 		skb_put(skb, pkt_len);
 		skb->protocol	= eth_type_trans(skb, dev);
diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c
index f2a90a7..870c539 100644
--- a/drivers/net/yellowfin.c
+++ b/drivers/net/yellowfin.c
@@ -1137,7 +1137,7 @@ static int yellowfin_rx(struct net_device *dev)
 				if (skb == NULL)
 					break;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
-				eth_copy_and_sum(skb, rx_skb->data, pkt_len, 0);
+				skb_copy_to_linear_data(skb, rx_skb->data, pkt_len);
 				skb_put(skb, pkt_len);
 				pci_dma_sync_single_for_device(yp->pci_dev, desc->addr,
 											   yp->rx_buf_sz,
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 071c67a..f48eb89 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -40,12 +40,6 @@ extern int		eth_header_cache(struct neighbour *neigh,
 					 struct hh_cache *hh);
 
 extern struct net_device *alloc_etherdev(int sizeof_priv);
-static inline void eth_copy_and_sum (struct sk_buff *dest, 
-				     const unsigned char *src, 
-				     int len, int base)
-{
-	memcpy (dest->data, src, len);
-}
 
 /**
  * is_zero_ether_addr - Determine if give Ethernet address is all zeros.
-- 
cgit v0.10.2


From b3d88ad49a0623d09efcf998beb26288c8029f75 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Sun, 10 Jun 2007 17:57:33 -0700
Subject: [MAC80211]: Add support for SIOCGIWRATE ioctl

At present, transmission rate information for mac80211 is available only
if verbose debugging is turned on, and then only in the logs. This patch
implements the SIOCGIWRATE ioctl, which adds the current transmission rate to
the output of iwconfig.

Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index 352f03b..66e8a97 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -838,6 +838,29 @@ static int ieee80211_ioctl_giwscan(struct net_device *dev,
 }
 
 
+static int ieee80211_ioctl_giwrate(struct net_device *dev,
+				  struct iw_request_info *info,
+				  struct iw_param *rate, char *extra)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct sta_info *sta;
+	struct ieee80211_sub_if_data *sdata;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	if (sdata->type == IEEE80211_IF_TYPE_STA)
+		sta = sta_info_get(local, sdata->u.sta.bssid);
+	else
+		return -EOPNOTSUPP;
+	if (!sta)
+		return -ENODEV;
+	if (sta->txrate < local->oper_hw_mode->num_rates)
+		rate->value = local->oper_hw_mode->rates[sta->txrate].rate * 100000;
+	else
+		rate->value = 0;
+	sta_info_put(sta);
+	return 0;
+}
+
 static int ieee80211_ioctl_siwrts(struct net_device *dev,
 				  struct iw_request_info *info,
 				  struct iw_param *rts, char *extra)
@@ -1779,7 +1802,7 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) NULL,				/* -- hole -- */
 	(iw_handler) NULL,				/* -- hole -- */
 	(iw_handler) NULL,				/* SIOCSIWRATE */
-	(iw_handler) NULL,				/* SIOCGIWRATE */
+	(iw_handler) ieee80211_ioctl_giwrate,		/* SIOCGIWRATE */
 	(iw_handler) ieee80211_ioctl_siwrts,		/* SIOCSIWRTS */
 	(iw_handler) ieee80211_ioctl_giwrts,		/* SIOCGIWRTS */
 	(iw_handler) ieee80211_ioctl_siwfrag,		/* SIOCSIWFRAG */
-- 
cgit v0.10.2


From 6472ce6096bf27d85a1f2580964a36f290bd60a9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:03:21 -0700
Subject: [NET]: Mark struct net_device * argument to netdev_priv const

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3a70f55..94cc77c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -546,7 +546,7 @@ struct net_device
 #define	NETDEV_ALIGN		32
 #define	NETDEV_ALIGN_CONST	(NETDEV_ALIGN - 1)
 
-static inline void *netdev_priv(struct net_device *dev)
+static inline void *netdev_priv(const struct net_device *dev)
 {
 	return (char *)dev + ((sizeof(struct net_device)
 					+ NETDEV_ALIGN_CONST)
-- 
cgit v0.10.2


From 0157f60c0caea24fa8347f4c0ed53297c412fce1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:03:36 -0700
Subject: [RTNETLINK]: Split up rtnl_setlink

Split up rtnl_setlink into a function performing validation and a function
performing the actual changes. This allows to share the modifcation logic
with rtnl_newlink, which is introduced by the next patch.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 02e8bf0..25ca219 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -561,44 +561,11 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINKMODE]		= { .type = NLA_U8 },
 };
 
-static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
+		      struct nlattr **tb, char *ifname)
 {
-	struct ifinfomsg *ifm;
-	struct net_device *dev;
-	int err, send_addr_notify = 0, modified = 0;
-	struct nlattr *tb[IFLA_MAX+1];
-	char ifname[IFNAMSIZ];
-
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
-	if (err < 0)
-		goto errout;
-
-	if (tb[IFLA_IFNAME])
-		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
-	else
-		ifname[0] = '\0';
-
-	err = -EINVAL;
-	ifm = nlmsg_data(nlh);
-	if (ifm->ifi_index > 0)
-		dev = dev_get_by_index(ifm->ifi_index);
-	else if (tb[IFLA_IFNAME])
-		dev = dev_get_by_name(ifname);
-	else
-		goto errout;
-
-	if (dev == NULL) {
-		err = -ENODEV;
-		goto errout;
-	}
-
-	if (tb[IFLA_ADDRESS] &&
-	    nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
-		goto errout_dev;
-
-	if (tb[IFLA_BROADCAST] &&
-	    nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
-		goto errout_dev;
+	int modified = 0, send_addr_notify = 0;
+	int err;
 
 	if (tb[IFLA_MAP]) {
 		struct rtnl_link_ifmap *u_map;
@@ -606,12 +573,12 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 		if (!dev->set_config) {
 			err = -EOPNOTSUPP;
-			goto errout_dev;
+			goto errout;
 		}
 
 		if (!netif_device_present(dev)) {
 			err = -ENODEV;
-			goto errout_dev;
+			goto errout;
 		}
 
 		u_map = nla_data(tb[IFLA_MAP]);
@@ -624,7 +591,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 		err = dev->set_config(dev, &k_map);
 		if (err < 0)
-			goto errout_dev;
+			goto errout;
 
 		modified = 1;
 	}
@@ -635,19 +602,19 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 		if (!dev->set_mac_address) {
 			err = -EOPNOTSUPP;
-			goto errout_dev;
+			goto errout;
 		}
 
 		if (!netif_device_present(dev)) {
 			err = -ENODEV;
-			goto errout_dev;
+			goto errout;
 		}
 
 		len = sizeof(sa_family_t) + dev->addr_len;
 		sa = kmalloc(len, GFP_KERNEL);
 		if (!sa) {
 			err = -ENOMEM;
-			goto errout_dev;
+			goto errout;
 		}
 		sa->sa_family = dev->type;
 		memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
@@ -655,7 +622,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		err = dev->set_mac_address(dev, sa);
 		kfree(sa);
 		if (err)
-			goto errout_dev;
+			goto errout;
 		send_addr_notify = 1;
 		modified = 1;
 	}
@@ -663,7 +630,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	if (tb[IFLA_MTU]) {
 		err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
 		if (err < 0)
-			goto errout_dev;
+			goto errout;
 		modified = 1;
 	}
 
@@ -675,7 +642,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	if (ifm->ifi_index > 0 && ifname[0]) {
 		err = dev_change_name(dev, ifname);
 		if (err < 0)
-			goto errout_dev;
+			goto errout;
 		modified = 1;
 	}
 
@@ -684,7 +651,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		send_addr_notify = 1;
 	}
 
-
 	if (ifm->ifi_flags || ifm->ifi_change) {
 		unsigned int flags = ifm->ifi_flags;
 
@@ -712,7 +678,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 	err = 0;
 
-errout_dev:
+errout:
 	if (err < 0 && modified && net_ratelimit())
 		printk(KERN_WARNING "A link change request failed with "
 		       "some changes comitted already. Interface %s may "
@@ -721,7 +687,50 @@ errout_dev:
 
 	if (send_addr_notify)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
 
+static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct ifinfomsg *ifm;
+	struct net_device *dev;
+	int err;
+	struct nlattr *tb[IFLA_MAX+1];
+	char ifname[IFNAMSIZ];
+
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		goto errout;
+
+	if (tb[IFLA_IFNAME])
+		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+	else
+		ifname[0] = '\0';
+
+	err = -EINVAL;
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_index > 0)
+		dev = dev_get_by_index(ifm->ifi_index);
+	else if (tb[IFLA_IFNAME])
+		dev = dev_get_by_name(ifname);
+	else
+		goto errout;
+
+	if (dev == NULL) {
+		err = -ENODEV;
+		goto errout;
+	}
+
+	if (tb[IFLA_ADDRESS] &&
+	    nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
+		goto errout_dev;
+
+	if (tb[IFLA_BROADCAST] &&
+	    nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
+		goto errout_dev;
+
+	err = do_setlink(dev, ifm, tb, ifname);
+errout_dev:
 	dev_put(dev);
 errout:
 	return err;
-- 
cgit v0.10.2


From 38f7b870d4a6a5d3ec21557e849620cb7d032965 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:03:51 -0700
Subject: [RTNETLINK]: Link creation API

Add rtnetlink API for creating, changing and deleting software devices.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 604c243..3144bab 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -76,6 +76,8 @@ enum
 #define IFLA_WEIGHT IFLA_WEIGHT
 	IFLA_OPERSTATE,
 	IFLA_LINKMODE,
+	IFLA_LINKINFO,
+#define IFLA_LINKINFO IFLA_LINKINFO
 	__IFLA_MAX
 };
 
@@ -140,4 +142,15 @@ struct ifla_cacheinfo
 	__u32	retrans_time;
 };
 
+enum
+{
+	IFLA_INFO_UNSPEC,
+	IFLA_INFO_KIND,
+	IFLA_INFO_DATA,
+	IFLA_INFO_XSTATS,
+	__IFLA_INFO_MAX,
+};
+
+#define IFLA_INFO_MAX	(__IFLA_INFO_MAX - 1)
+
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 94cc77c..e7913ee 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -540,6 +540,9 @@ struct net_device
 	struct device		dev;
 	/* space for optional statistics and wireless sysfs groups */
 	struct attribute_group  *sysfs_groups[3];
+
+	/* rtnetlink link ops */
+	const struct rtnl_link_ops *rtnl_link_ops;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 3b3d474..3861c05 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -22,4 +22,62 @@ static inline int rtnl_msg_family(struct nlmsghdr *nlh)
 		return AF_UNSPEC;
 }
 
+/**
+ *	struct rtnl_link_ops - rtnetlink link operations
+ *
+ *	@list: Used internally
+ *	@kind: Identifier
+ *	@maxtype: Highest device specific netlink attribute number
+ *	@policy: Netlink policy for device specific attribute validation
+ *	@validate: Optional validation function for netlink/changelink parameters
+ *	@priv_size: sizeof net_device private space
+ *	@setup: net_device setup function
+ *	@newlink: Function for configuring and registering a new device
+ *	@changelink: Function for changing parameters of an existing device
+ *	@dellink: Function to remove a device
+ *	@get_size: Function to calculate required room for dumping device
+ *		   specific netlink attributes
+ *	@fill_info: Function to dump device specific netlink attributes
+ *	@get_xstats_size: Function to calculate required room for dumping devic
+ *			  specific statistics
+ *	@fill_xstats: Function to dump device specific statistics
+ */
+struct rtnl_link_ops {
+	struct list_head	list;
+
+	const char		*kind;
+
+	size_t			priv_size;
+	void			(*setup)(struct net_device *dev);
+
+	int			maxtype;
+	const struct nla_policy	*policy;
+	int			(*validate)(struct nlattr *tb[],
+					    struct nlattr *data[]);
+
+	int			(*newlink)(struct net_device *dev,
+					   struct nlattr *tb[],
+					   struct nlattr *data[]);
+	int			(*changelink)(struct net_device *dev,
+					      struct nlattr *tb[],
+					      struct nlattr *data[]);
+	void			(*dellink)(struct net_device *dev);
+
+	size_t			(*get_size)(const struct net_device *dev);
+	int			(*fill_info)(struct sk_buff *skb,
+					     const struct net_device *dev);
+
+	size_t			(*get_xstats_size)(const struct net_device *dev);
+	int			(*fill_xstats)(struct sk_buff *skb,
+					       const struct net_device *dev);
+};
+
+extern int	__rtnl_link_register(struct rtnl_link_ops *ops);
+extern void	__rtnl_link_unregister(struct rtnl_link_ops *ops);
+
+extern int	rtnl_link_register(struct rtnl_link_ops *ops);
+extern void	rtnl_link_unregister(struct rtnl_link_ops *ops);
+
+#define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind)
+
 #endif
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 25ca219..06c0c5a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -243,6 +243,143 @@ void rtnl_unregister_all(int protocol)
 
 EXPORT_SYMBOL_GPL(rtnl_unregister_all);
 
+static LIST_HEAD(link_ops);
+
+/**
+ * __rtnl_link_register - Register rtnl_link_ops with rtnetlink.
+ * @ops: struct rtnl_link_ops * to register
+ *
+ * The caller must hold the rtnl_mutex. This function should be used
+ * by drivers that create devices during module initialization. It
+ * must be called before registering the devices.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_link_register(struct rtnl_link_ops *ops)
+{
+	list_add_tail(&ops->list, &link_ops);
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(__rtnl_link_register);
+
+/**
+ * rtnl_link_register - Register rtnl_link_ops with rtnetlink.
+ * @ops: struct rtnl_link_ops * to register
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_link_register(struct rtnl_link_ops *ops)
+{
+	int err;
+
+	rtnl_lock();
+	err = __rtnl_link_register(ops);
+	rtnl_unlock();
+	return err;
+}
+
+EXPORT_SYMBOL_GPL(rtnl_link_register);
+
+/**
+ * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
+ * @ops: struct rtnl_link_ops * to unregister
+ *
+ * The caller must hold the rtnl_mutex. This function should be used
+ * by drivers that unregister devices during module unloading. It must
+ * be called after unregistering the devices.
+ */
+void __rtnl_link_unregister(struct rtnl_link_ops *ops)
+{
+	list_del(&ops->list);
+}
+
+EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
+
+/**
+ * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
+ * @ops: struct rtnl_link_ops * to unregister
+ */
+void rtnl_link_unregister(struct rtnl_link_ops *ops)
+{
+	rtnl_lock();
+	__rtnl_link_unregister(ops);
+	rtnl_unlock();
+}
+
+EXPORT_SYMBOL_GPL(rtnl_link_unregister);
+
+static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
+{
+	const struct rtnl_link_ops *ops;
+
+	list_for_each_entry(ops, &link_ops, list) {
+		if (!strcmp(ops->kind, kind))
+			return ops;
+	}
+	return NULL;
+}
+
+static size_t rtnl_link_get_size(const struct net_device *dev)
+{
+	const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
+	size_t size;
+
+	if (!ops)
+		return 0;
+
+	size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
+	       nlmsg_total_size(strlen(ops->kind) + 1);	 /* IFLA_INFO_KIND */
+
+	if (ops->get_size)
+		/* IFLA_INFO_DATA + nested data */
+		size += nlmsg_total_size(sizeof(struct nlattr)) +
+			ops->get_size(dev);
+
+	if (ops->get_xstats_size)
+		size += ops->get_xstats_size(dev);	/* IFLA_INFO_XSTATS */
+
+	return size;
+}
+
+static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev)
+{
+	const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
+	struct nlattr *linkinfo, *data;
+	int err = -EMSGSIZE;
+
+	linkinfo = nla_nest_start(skb, IFLA_LINKINFO);
+	if (linkinfo == NULL)
+		goto out;
+
+	if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0)
+		goto err_cancel_link;
+	if (ops->fill_xstats) {
+		err = ops->fill_xstats(skb, dev);
+		if (err < 0)
+			goto err_cancel_link;
+	}
+	if (ops->fill_info) {
+		data = nla_nest_start(skb, IFLA_INFO_DATA);
+		if (data == NULL)
+			goto err_cancel_link;
+		err = ops->fill_info(skb, dev);
+		if (err < 0)
+			goto err_cancel_data;
+		nla_nest_end(skb, data);
+	}
+
+	nla_nest_end(skb, linkinfo);
+	return 0;
+
+err_cancel_data:
+	nla_nest_cancel(skb, data);
+err_cancel_link:
+	nla_nest_cancel(skb, linkinfo);
+out:
+	return err;
+}
+
 static const int rtm_min[RTM_NR_FAMILIES] =
 {
 	[RTM_FAM(RTM_NEWLINK)]      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
@@ -437,7 +574,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 	a->tx_compressed = b->tx_compressed;
 };
 
-static inline size_t if_nlmsg_size(void)
+static inline size_t if_nlmsg_size(const struct net_device *dev)
 {
 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
 	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
@@ -452,7 +589,8 @@ static inline size_t if_nlmsg_size(void)
 	       + nla_total_size(4) /* IFLA_LINK */
 	       + nla_total_size(4) /* IFLA_MASTER */
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
-	       + nla_total_size(1); /* IFLA_LINKMODE */
+	       + nla_total_size(1) /* IFLA_LINKMODE */
+	       + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
 }
 
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
@@ -522,6 +660,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		}
 	}
 
+	if (dev->rtnl_link_ops) {
+		if (rtnl_link_fill(skb, dev) < 0)
+			goto nla_put_failure;
+	}
+
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -553,6 +696,8 @@ cont:
 
 static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_IFNAME]		= { .type = NLA_STRING, .len = IFNAMSIZ-1 },
+	[IFLA_ADDRESS]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+	[IFLA_BROADCAST]	= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
 	[IFLA_MAP]		= { .len = sizeof(struct rtnl_link_ifmap) },
 	[IFLA_MTU]		= { .type = NLA_U32 },
 	[IFLA_TXQLEN]		= { .type = NLA_U32 },
@@ -561,10 +706,15 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINKMODE]		= { .type = NLA_U8 },
 };
 
+static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
+	[IFLA_INFO_KIND]	= { .type = NLA_STRING },
+	[IFLA_INFO_DATA]	= { .type = NLA_NESTED },
+};
+
 static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
-		      struct nlattr **tb, char *ifname)
+		      struct nlattr **tb, char *ifname, int modified)
 {
-	int modified = 0, send_addr_notify = 0;
+	int send_addr_notify = 0;
 	int err;
 
 	if (tb[IFLA_MAP]) {
@@ -729,13 +879,189 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	    nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
 		goto errout_dev;
 
-	err = do_setlink(dev, ifm, tb, ifname);
+	err = do_setlink(dev, ifm, tb, ifname, 0);
 errout_dev:
 	dev_put(dev);
 errout:
 	return err;
 }
 
+static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	const struct rtnl_link_ops *ops;
+	struct net_device *dev;
+	struct ifinfomsg *ifm;
+	char ifname[IFNAMSIZ];
+	struct nlattr *tb[IFLA_MAX+1];
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[IFLA_IFNAME])
+		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_index > 0)
+		dev = __dev_get_by_index(ifm->ifi_index);
+	else if (tb[IFLA_IFNAME])
+		dev = __dev_get_by_name(ifname);
+	else
+		return -EINVAL;
+
+	if (!dev)
+		return -ENODEV;
+
+	ops = dev->rtnl_link_ops;
+	if (!ops)
+		return -EOPNOTSUPP;
+
+	ops->dellink(dev);
+	return 0;
+}
+
+static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	const struct rtnl_link_ops *ops;
+	struct net_device *dev;
+	struct ifinfomsg *ifm;
+	char kind[MODULE_NAME_LEN];
+	char ifname[IFNAMSIZ];
+	struct nlattr *tb[IFLA_MAX+1];
+	struct nlattr *linkinfo[IFLA_INFO_MAX+1];
+	int err;
+
+replay:
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[IFLA_IFNAME])
+		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+	else
+		ifname[0] = '\0';
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_index > 0)
+		dev = __dev_get_by_index(ifm->ifi_index);
+	else if (ifname[0])
+		dev = __dev_get_by_name(ifname);
+	else
+		dev = NULL;
+
+	if (tb[IFLA_LINKINFO]) {
+		err = nla_parse_nested(linkinfo, IFLA_INFO_MAX,
+				       tb[IFLA_LINKINFO], ifla_info_policy);
+		if (err < 0)
+			return err;
+	} else
+		memset(linkinfo, 0, sizeof(linkinfo));
+
+	if (linkinfo[IFLA_INFO_KIND]) {
+		nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
+		ops = rtnl_link_ops_get(kind);
+	} else {
+		kind[0] = '\0';
+		ops = NULL;
+	}
+
+	if (1) {
+		struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL;
+
+		if (ops) {
+			if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
+				err = nla_parse_nested(attr, ops->maxtype,
+						       linkinfo[IFLA_INFO_DATA],
+						       ops->policy);
+				if (err < 0)
+					return err;
+				data = attr;
+			}
+			if (ops->validate) {
+				err = ops->validate(tb, data);
+				if (err < 0)
+					return err;
+			}
+		}
+
+		if (dev) {
+			int modified = 0;
+
+			if (nlh->nlmsg_flags & NLM_F_EXCL)
+				return -EEXIST;
+			if (nlh->nlmsg_flags & NLM_F_REPLACE)
+				return -EOPNOTSUPP;
+
+			if (linkinfo[IFLA_INFO_DATA]) {
+				if (!ops || ops != dev->rtnl_link_ops ||
+				    !ops->changelink)
+					return -EOPNOTSUPP;
+
+				err = ops->changelink(dev, tb, data);
+				if (err < 0)
+					return err;
+				modified = 1;
+			}
+
+			return do_setlink(dev, ifm, tb, ifname, modified);
+		}
+
+		if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+			return -ENODEV;
+
+		if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change)
+			return -EOPNOTSUPP;
+		if (tb[IFLA_ADDRESS] || tb[IFLA_BROADCAST] || tb[IFLA_MAP] ||
+		    tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
+			return -EOPNOTSUPP;
+
+		if (!ops) {
+#ifdef CONFIG_KMOD
+			if (kind[0]) {
+				__rtnl_unlock();
+				request_module("rtnl-link-%s", kind);
+				rtnl_lock();
+				ops = rtnl_link_ops_get(kind);
+				if (ops)
+					goto replay;
+			}
+#endif
+			return -EOPNOTSUPP;
+		}
+
+		if (!ifname[0])
+			snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
+		dev = alloc_netdev(ops->priv_size, ifname, ops->setup);
+		if (!dev)
+			return -ENOMEM;
+
+		if (strchr(dev->name, '%')) {
+			err = dev_alloc_name(dev, dev->name);
+			if (err < 0)
+				goto err_free;
+		}
+		dev->rtnl_link_ops = ops;
+
+		if (tb[IFLA_MTU])
+			dev->mtu = nla_get_u32(tb[IFLA_MTU]);
+		if (tb[IFLA_TXQLEN])
+			dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
+		if (tb[IFLA_WEIGHT])
+			dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
+		if (tb[IFLA_OPERSTATE])
+			set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
+		if (tb[IFLA_LINKMODE])
+			dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+
+		err = ops->newlink(dev, tb, data);
+err_free:
+		if (err < 0)
+			free_netdev(dev);
+		return err;
+	}
+}
+
 static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct ifinfomsg *ifm;
@@ -756,7 +1082,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	} else
 		return -EINVAL;
 
-	nskb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL);
+	nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
 	if (nskb == NULL) {
 		err = -ENOBUFS;
 		goto errout;
@@ -806,7 +1132,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL);
+	skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
 	if (skb == NULL)
 		goto errout;
 
@@ -961,6 +1287,8 @@ void __init rtnetlink_init(void)
 
 	rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo);
 	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL);
 
 	rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all);
 	rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
-- 
cgit v0.10.2


From 58651b24acfd9a6fd3b217b52e577ce34b0932af Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:04:06 -0700
Subject: [DUMMY]: Use dev->stats

Use dev->stats instead of netdev_priv().

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 60673bc..91b474c 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -38,7 +38,6 @@
 static int numdummies = 1;
 
 static int dummy_xmit(struct sk_buff *skb, struct net_device *dev);
-static struct net_device_stats *dummy_get_stats(struct net_device *dev);
 
 static int dummy_set_address(struct net_device *dev, void *p)
 {
@@ -59,7 +58,6 @@ static void set_multicast_list(struct net_device *dev)
 static void __init dummy_setup(struct net_device *dev)
 {
 	/* Initialize the device structure. */
-	dev->get_stats = dummy_get_stats;
 	dev->hard_start_xmit = dummy_xmit;
 	dev->set_multicast_list = set_multicast_list;
 	dev->set_mac_address = dummy_set_address;
@@ -76,20 +74,13 @@ static void __init dummy_setup(struct net_device *dev)
 
 static int dummy_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct net_device_stats *stats = netdev_priv(dev);
-
-	stats->tx_packets++;
-	stats->tx_bytes+=skb->len;
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
 
 	dev_kfree_skb(skb);
 	return 0;
 }
 
-static struct net_device_stats *dummy_get_stats(struct net_device *dev)
-{
-	return netdev_priv(dev);
-}
-
 static struct net_device **dummies;
 
 /* Number of dummy devices to be set up by this module. */
@@ -101,8 +92,7 @@ static int __init dummy_init_one(int index)
 	struct net_device *dev_dummy;
 	int err;
 
-	dev_dummy = alloc_netdev(sizeof(struct net_device_stats),
-				 "dummy%d", dummy_setup);
+	dev_dummy = alloc_netdev(0, "dummy%d", dummy_setup);
 
 	if (!dev_dummy)
 		return -ENOMEM;
-- 
cgit v0.10.2


From 206c9fb26f5df2ea6d440fb64159faf4d8665398 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:04:20 -0700
Subject: [DUMMY]: Keep dummy devices on list

Use a list instead of an array to allow creating new devices.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 91b474c..2f2cf3c 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -34,6 +34,12 @@
 #include <linux/etherdevice.h>
 #include <linux/init.h>
 #include <linux/moduleparam.h>
+#include <linux/rtnetlink.h>
+
+struct dummy_priv {
+	struct net_device *dev;
+	struct list_head list;
+};
 
 static int numdummies = 1;
 
@@ -81,18 +87,20 @@ static int dummy_xmit(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-static struct net_device **dummies;
+static LIST_HEAD(dummies);
 
 /* Number of dummy devices to be set up by this module. */
 module_param(numdummies, int, 0);
 MODULE_PARM_DESC(numdummies, "Number of dummy pseudo devices");
 
-static int __init dummy_init_one(int index)
+static int __init dummy_init_one(void)
 {
 	struct net_device *dev_dummy;
+	struct dummy_priv *priv;
 	int err;
 
-	dev_dummy = alloc_netdev(0, "dummy%d", dummy_setup);
+	dev_dummy = alloc_netdev(sizeof(struct dummy_priv), "dummy%d",
+				 dummy_setup);
 
 	if (!dev_dummy)
 		return -ENOMEM;
@@ -101,40 +109,43 @@ static int __init dummy_init_one(int index)
 		free_netdev(dev_dummy);
 		dev_dummy = NULL;
 	} else {
-		dummies[index] = dev_dummy;
+		priv = netdev_priv(dev_dummy);
+		priv->dev = dev_dummy;
+		list_add_tail(&priv->list, &dummies);
 	}
 
 	return err;
 }
 
-static void dummy_free_one(int index)
+static void dummy_free_one(struct net_device *dev)
 {
-	unregister_netdev(dummies[index]);
-	free_netdev(dummies[index]);
+	struct dummy_priv *priv = netdev_priv(dev);
+
+	list_del(&priv->list);
+	unregister_netdev(dev);
+	free_netdev(dev);
 }
 
 static int __init dummy_init_module(void)
 {
+	struct dummy_priv *priv, *next;
 	int i, err = 0;
-	dummies = kmalloc(numdummies * sizeof(void *), GFP_KERNEL);
-	if (!dummies)
-		return -ENOMEM;
+
 	for (i = 0; i < numdummies && !err; i++)
-		err = dummy_init_one(i);
+		err = dummy_init_one();
 	if (err) {
-		i--;
-		while (--i >= 0)
-			dummy_free_one(i);
+		list_for_each_entry_safe(priv, next, &dummies, list)
+			dummy_free_one(priv->dev);
 	}
 	return err;
 }
 
 static void __exit dummy_cleanup_module(void)
 {
-	int i;
-	for (i = 0; i < numdummies; i++)
-		dummy_free_one(i);
-	kfree(dummies);
+	struct dummy_priv *priv, *next;
+
+	list_for_each_entry_safe(priv, next, &dummies, list)
+		dummy_free_one(priv->dev);
 }
 
 module_init(dummy_init_module);
-- 
cgit v0.10.2


From 5d5cb173d85ebf6dfb16f456a8148ecb4b1cecbc Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:04:34 -0700
Subject: [DUMMY]: Use rtnl_link API

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 2f2cf3c..91126b9 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/moduleparam.h>
 #include <linux/rtnetlink.h>
+#include <net/rtnetlink.h>
 
 struct dummy_priv {
 	struct net_device *dev;
@@ -61,12 +62,13 @@ static void set_multicast_list(struct net_device *dev)
 {
 }
 
-static void __init dummy_setup(struct net_device *dev)
+static void dummy_setup(struct net_device *dev)
 {
 	/* Initialize the device structure. */
 	dev->hard_start_xmit = dummy_xmit;
 	dev->set_multicast_list = set_multicast_list;
 	dev->set_mac_address = dummy_set_address;
+	dev->destructor = free_netdev;
 
 	/* Fill in device structure with ethernet-generic values. */
 	ether_setup(dev);
@@ -89,6 +91,37 @@ static int dummy_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static LIST_HEAD(dummies);
 
+static int dummy_newlink(struct net_device *dev,
+			 struct nlattr *tb[], struct nlattr *data[])
+{
+	struct dummy_priv *priv = netdev_priv(dev);
+	int err;
+
+	err = register_netdevice(dev);
+	if (err < 0)
+		return err;
+
+	priv->dev = dev;
+	list_add_tail(&priv->list, &dummies);
+	return 0;
+}
+
+static void dummy_dellink(struct net_device *dev)
+{
+	struct dummy_priv *priv = netdev_priv(dev);
+
+	list_del(&priv->list);
+	unregister_netdevice(dev);
+}
+
+static struct rtnl_link_ops dummy_link_ops __read_mostly = {
+	.kind		= "dummy",
+	.priv_size	= sizeof(struct dummy_priv),
+	.setup		= dummy_setup,
+	.newlink	= dummy_newlink,
+	.dellink	= dummy_dellink,
+};
+
 /* Number of dummy devices to be set up by this module. */
 module_param(numdummies, int, 0);
 MODULE_PARM_DESC(numdummies, "Number of dummy pseudo devices");
@@ -105,25 +138,23 @@ static int __init dummy_init_one(void)
 	if (!dev_dummy)
 		return -ENOMEM;
 
-	if ((err = register_netdev(dev_dummy))) {
-		free_netdev(dev_dummy);
-		dev_dummy = NULL;
-	} else {
-		priv = netdev_priv(dev_dummy);
-		priv->dev = dev_dummy;
-		list_add_tail(&priv->list, &dummies);
-	}
+	err = dev_alloc_name(dev_dummy, dev_dummy->name);
+	if (err < 0)
+		goto err;
 
-	return err;
-}
+	dev_dummy->rtnl_link_ops = &dummy_link_ops;
+	err = register_netdevice(dev_dummy);
+	if (err < 0)
+		goto err;
 
-static void dummy_free_one(struct net_device *dev)
-{
-	struct dummy_priv *priv = netdev_priv(dev);
+	priv = netdev_priv(dev_dummy);
+	priv->dev = dev_dummy;
+	list_add_tail(&priv->list, &dummies);
+	return 0;
 
-	list_del(&priv->list);
-	unregister_netdev(dev);
-	free_netdev(dev);
+err:
+	free_netdev(dev_dummy);
+	return err;
 }
 
 static int __init dummy_init_module(void)
@@ -131,12 +162,18 @@ static int __init dummy_init_module(void)
 	struct dummy_priv *priv, *next;
 	int i, err = 0;
 
+	rtnl_lock();
+	err = __rtnl_link_register(&dummy_link_ops);
+
 	for (i = 0; i < numdummies && !err; i++)
 		err = dummy_init_one();
-	if (err) {
+	if (err < 0) {
 		list_for_each_entry_safe(priv, next, &dummies, list)
-			dummy_free_one(priv->dev);
+			dummy_dellink(priv->dev);
+		__rtnl_link_unregister(&dummy_link_ops);
 	}
+	rtnl_unlock();
+
 	return err;
 }
 
@@ -144,10 +181,15 @@ static void __exit dummy_cleanup_module(void)
 {
 	struct dummy_priv *priv, *next;
 
+	rtnl_lock();
 	list_for_each_entry_safe(priv, next, &dummies, list)
-		dummy_free_one(priv->dev);
+		dummy_dellink(priv->dev);
+
+	__rtnl_link_unregister(&dummy_link_ops);
+	rtnl_unlock();
 }
 
 module_init(dummy_init_module);
 module_exit(dummy_cleanup_module);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("dummy");
-- 
cgit v0.10.2


From 62b7ffcaaa4e91ed547fc55758076ac536bd5571 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:04:51 -0700
Subject: [IFB]: Keep ifb devices on list

Use a list instead of an array to allow creating new devices.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 07b4c0d..819945e 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -33,12 +33,15 @@
 #include <linux/etherdevice.h>
 #include <linux/init.h>
 #include <linux/moduleparam.h>
+#include <linux/list.h>
 #include <net/pkt_sched.h>
 
 #define TX_TIMEOUT  (2*HZ)
 
 #define TX_Q_LIMIT    32
 struct ifb_private {
+	struct list_head	list;
+	struct net_device	*dev;
 	struct net_device_stats stats;
 	struct tasklet_struct   ifb_tasklet;
 	int     tasklet_pending;
@@ -197,7 +200,7 @@ static struct net_device_stats *ifb_get_stats(struct net_device *dev)
 	return stats;
 }
 
-static struct net_device **ifbs;
+static LIST_HEAD(ifbs);
 
 /* Number of ifb devices to be set up by this module. */
 module_param(numifbs, int, 0);
@@ -229,6 +232,7 @@ static int ifb_open(struct net_device *dev)
 static int __init ifb_init_one(int index)
 {
 	struct net_device *dev_ifb;
+	struct ifb_private *priv;
 	int err;
 
 	dev_ifb = alloc_netdev(sizeof(struct ifb_private),
@@ -241,30 +245,33 @@ static int __init ifb_init_one(int index)
 		free_netdev(dev_ifb);
 		dev_ifb = NULL;
 	} else {
-		ifbs[index] = dev_ifb;
+		priv = netdev_priv(dev_ifb);
+		priv->dev = dev_ifb;
+		list_add_tail(&priv->list, &ifbs);
 	}
 
 	return err;
 }
 
-static void ifb_free_one(int index)
+static void ifb_free_one(struct net_device *dev)
 {
-	unregister_netdev(ifbs[index]);
-	free_netdev(ifbs[index]);
+	struct ifb_private *priv = netdev_priv(dev);
+
+	list_del(&priv->list);
+	unregister_netdev(dev);
+	free_netdev(dev);
 }
 
 static int __init ifb_init_module(void)
 {
+	struct ifb_private *priv, *next;
 	int i, err = 0;
-	ifbs = kmalloc(numifbs * sizeof(void *), GFP_KERNEL);
-	if (!ifbs)
-		return -ENOMEM;
+
 	for (i = 0; i < numifbs && !err; i++)
 		err = ifb_init_one(i);
 	if (err) {
-		i--;
-		while (--i >= 0)
-			ifb_free_one(i);
+		list_for_each_entry_safe(priv, next, &ifbs, list)
+			ifb_free_one(priv->dev);
 	}
 
 	return err;
@@ -272,11 +279,10 @@ static int __init ifb_init_module(void)
 
 static void __exit ifb_cleanup_module(void)
 {
-	int i;
+	struct ifb_private *priv, *next;
 
-	for (i = 0; i < numifbs; i++)
-		ifb_free_one(i);
-	kfree(ifbs);
+	list_for_each_entry_safe(priv, next, &ifbs, list)
+		ifb_free_one(priv->dev);
 }
 
 module_init(ifb_init_module);
-- 
cgit v0.10.2


From 9ba2cd656021e7f70038ba9d551224e04d0bfcef Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:05:06 -0700
Subject: [IFB]: Use rtnl_link API

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 819945e..669ee1a 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -139,13 +139,14 @@ resched:
 
 }
 
-static void __init ifb_setup(struct net_device *dev)
+static void ifb_setup(struct net_device *dev)
 {
 	/* Initialize the device structure. */
 	dev->get_stats = ifb_get_stats;
 	dev->hard_start_xmit = ifb_xmit;
 	dev->open = &ifb_open;
 	dev->stop = &ifb_close;
+	dev->destructor = free_netdev;
 
 	/* Fill in device structure with ethernet-generic values. */
 	ether_setup(dev);
@@ -229,6 +230,37 @@ static int ifb_open(struct net_device *dev)
 	return 0;
 }
 
+static int ifb_newlink(struct net_device *dev,
+		       struct nlattr *tb[], struct nlattr *data[])
+{
+	struct ifb_private *priv = netdev_priv(dev);
+	int err;
+
+	err = register_netdevice(dev);
+	if (err < 0)
+		return err;
+
+	priv->dev = dev;
+	list_add_tail(&priv->list, &ifbs);
+	return 0;
+}
+
+static void ifb_dellink(struct net_device *dev)
+{
+	struct ifb_private *priv = netdev_priv(dev);
+
+	list_del(&priv->list);
+	unregister_netdevice(dev);
+}
+
+static struct rtnl_link_ops ifb_link_ops __read_mostly = {
+	.kind		= "ifb",
+	.priv_size	= sizeof(struct ifb_private),
+	.setup		= ifb_setup,
+	.newlink	= ifb_newlink,
+	.dellink	= ifb_dellink,
+};
+
 static int __init ifb_init_one(int index)
 {
 	struct net_device *dev_ifb;
@@ -241,38 +273,41 @@ static int __init ifb_init_one(int index)
 	if (!dev_ifb)
 		return -ENOMEM;
 
-	if ((err = register_netdev(dev_ifb))) {
-		free_netdev(dev_ifb);
-		dev_ifb = NULL;
-	} else {
-		priv = netdev_priv(dev_ifb);
-		priv->dev = dev_ifb;
-		list_add_tail(&priv->list, &ifbs);
-	}
+	err = dev_alloc_name(dev_ifb, dev_ifb->name);
+	if (err < 0)
+		goto err;
 
-	return err;
-}
+	dev_ifb->rtnl_link_ops = &ifb_link_ops;
+	err = register_netdevice(dev_ifb);
+	if (err < 0)
+		goto err;
 
-static void ifb_free_one(struct net_device *dev)
-{
-	struct ifb_private *priv = netdev_priv(dev);
+	priv = netdev_priv(dev_ifb);
+	priv->dev = dev_ifb;
+	list_add_tail(&priv->list, &ifbs);
+	return 0;
 
-	list_del(&priv->list);
-	unregister_netdev(dev);
-	free_netdev(dev);
+err:
+	free_netdev(dev_ifb);
+	return err;
 }
 
 static int __init ifb_init_module(void)
 {
 	struct ifb_private *priv, *next;
-	int i, err = 0;
+	int i, err;
+
+	rtnl_lock();
+	err = __rtnl_link_register(&ifb_link_ops);
 
 	for (i = 0; i < numifbs && !err; i++)
 		err = ifb_init_one(i);
 	if (err) {
 		list_for_each_entry_safe(priv, next, &ifbs, list)
-			ifb_free_one(priv->dev);
+			ifb_dellink(priv->dev);
+		__rtnl_link_unregister(&ifb_link_ops);
 	}
+	rtnl_unlock();
 
 	return err;
 }
@@ -281,11 +316,16 @@ static void __exit ifb_cleanup_module(void)
 {
 	struct ifb_private *priv, *next;
 
+	rtnl_lock();
 	list_for_each_entry_safe(priv, next, &ifbs, list)
-		ifb_free_one(priv->dev);
+		ifb_dellink(priv->dev);
+
+	__rtnl_link_unregister(&ifb_link_ops);
+	rtnl_unlock();
 }
 
 module_init(ifb_init_module);
 module_exit(ifb_cleanup_module);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jamal Hadi Salim");
+MODULE_ALIAS_RTNL_LINK("ifb");
-- 
cgit v0.10.2


From c17d8874f9959070552fddf1b4e1d73c0c144c0f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:05:22 -0700
Subject: [VLAN]: Convert name-based configuration functions to struct
 netdevice *

Move the device lookup and checks to the ioctl handler under the RTNL and
change all name-based interfaces to take a struct net_device * instead.

This allows to use them from a netlink interface, which identifies devices
based on ifindex not name. It also avoids races between the ioctl interface
and the (upcoming) netlink interface since now all changes happen under the
RTNL.

As a nice side effect this greatly simplifies error handling in the helper
functions and fixes a number of incorrect error codes like -EINVAL for
device not found.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index de78c9d..3678f07 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -278,43 +278,16 @@ static int unregister_vlan_dev(struct net_device *real_dev,
 	return ret;
 }
 
-static int unregister_vlan_device(const char *vlan_IF_name)
+static int unregister_vlan_device(struct net_device *dev)
 {
-	struct net_device *dev = NULL;
 	int ret;
 
+	ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
+				  VLAN_DEV_INFO(dev)->vlan_id);
+	unregister_netdevice(dev);
 
-	dev = dev_get_by_name(vlan_IF_name);
-	ret = -EINVAL;
-	if (dev) {
-		if (dev->priv_flags & IFF_802_1Q_VLAN) {
-			rtnl_lock();
-
-			ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
-						  VLAN_DEV_INFO(dev)->vlan_id);
-
-			dev_put(dev);
-			unregister_netdevice(dev);
-
-			rtnl_unlock();
-
-			if (ret == 1)
-				ret = 0;
-		} else {
-			printk(VLAN_ERR
-			       "%s: ERROR:	Tried to remove a non-vlan device "
-			       "with VLAN code, name: %s  priv_flags: %hX\n",
-			       __FUNCTION__, dev->name, dev->priv_flags);
-			dev_put(dev);
-			ret = -EPERM;
-		}
-	} else {
-#ifdef VLAN_DEBUG
-		printk(VLAN_DBG "%s: WARNING: Could not find dev.\n", __FUNCTION__);
-#endif
-		ret = -EINVAL;
-	}
-
+	if (ret == 1)
+		ret = 0;
 	return ret;
 }
 
@@ -378,12 +351,11 @@ static struct lock_class_key vlan_netdev_xmit_lock_key;
  *  Returns the device that was created, or NULL if there was
  *  an error of some kind.
  */
-static struct net_device *register_vlan_device(const char *eth_IF_name,
+static struct net_device *register_vlan_device(struct net_device *real_dev,
 					       unsigned short VLAN_ID)
 {
 	struct vlan_group *grp;
 	struct net_device *new_dev;
-	struct net_device *real_dev; /* the ethernet device */
 	char name[IFNAMSIZ];
 	int i;
 
@@ -395,46 +367,36 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
 	if (VLAN_ID >= VLAN_VID_MASK)
 		goto out_ret_null;
 
-	/* find the device relating to eth_IF_name. */
-	real_dev = dev_get_by_name(eth_IF_name);
-	if (!real_dev)
-		goto out_ret_null;
-
 	if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
 		printk(VLAN_DBG "%s: VLANs not supported on %s.\n",
 			__FUNCTION__, real_dev->name);
-		goto out_put_dev;
+		goto out_ret_null;
 	}
 
 	if ((real_dev->features & NETIF_F_HW_VLAN_RX) &&
 	    !real_dev->vlan_rx_register) {
 		printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n",
 			__FUNCTION__, real_dev->name);
-		goto out_put_dev;
+		goto out_ret_null;
 	}
 
 	if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
 	    (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) {
 		printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n",
 			__FUNCTION__, real_dev->name);
-		goto out_put_dev;
+		goto out_ret_null;
 	}
 
-	/* From this point on, all the data structures must remain
-	 * consistent.
-	 */
-	rtnl_lock();
-
 	/* The real device must be up and operating in order to
 	 * assosciate a VLAN device with it.
 	 */
 	if (!(real_dev->flags & IFF_UP))
-		goto out_unlock;
+		goto out_ret_null;
 
 	if (__find_vlan_dev(real_dev, VLAN_ID) != NULL) {
 		/* was already registered. */
 		printk(VLAN_DBG "%s: ALREADY had VLAN registered\n", __FUNCTION__);
-		goto out_unlock;
+		goto out_ret_null;
 	}
 
 	/* Gotta set up the fields for the device. */
@@ -471,7 +433,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
 			       vlan_setup);
 
 	if (new_dev == NULL)
-		goto out_unlock;
+		goto out_ret_null;
 
 #ifdef VLAN_DEBUG
 	printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name);
@@ -577,9 +539,8 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
 	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
 		real_dev->vlan_rx_add_vid(real_dev, VLAN_ID);
 
-	rtnl_unlock();
-
-
+	/* Account for reference in struct vlan_dev_info */
+	dev_hold(real_dev);
 #ifdef VLAN_DEBUG
 	printk(VLAN_DBG "Allocated new device successfully, returning.\n");
 #endif
@@ -590,17 +551,11 @@ out_free_arrays:
 
 out_free_unregister:
 	unregister_netdev(new_dev);
-	goto out_unlock;
+	goto out_ret_null;
 
 out_free_newdev:
 	free_netdev(new_dev);
 
-out_unlock:
-	rtnl_unlock();
-
-out_put_dev:
-	dev_put(real_dev);
-
 out_ret_null:
 	return NULL;
 }
@@ -693,9 +648,10 @@ out:
  */
 static int vlan_ioctl_handler(void __user *arg)
 {
-	int err = 0;
+	int err;
 	unsigned short vid = 0;
 	struct vlan_ioctl_args args;
+	struct net_device *dev = NULL;
 
 	if (copy_from_user(&args, arg, sizeof(struct vlan_ioctl_args)))
 		return -EFAULT;
@@ -708,35 +664,61 @@ static int vlan_ioctl_handler(void __user *arg)
 	printk(VLAN_DBG "%s: args.cmd: %x\n", __FUNCTION__, args.cmd);
 #endif
 
+	rtnl_lock();
+
 	switch (args.cmd) {
 	case SET_VLAN_INGRESS_PRIORITY_CMD:
+	case SET_VLAN_EGRESS_PRIORITY_CMD:
+	case SET_VLAN_FLAG_CMD:
+	case ADD_VLAN_CMD:
+	case DEL_VLAN_CMD:
+	case GET_VLAN_REALDEV_NAME_CMD:
+	case GET_VLAN_VID_CMD:
+		err = -ENODEV;
+		dev = __dev_get_by_name(args.device1);
+		if (!dev)
+			goto out;
+
+		err = -EINVAL;
+		if (args.cmd != ADD_VLAN_CMD &&
+		    !(dev->priv_flags & IFF_802_1Q_VLAN))
+			goto out;
+	}
+
+	switch (args.cmd) {
+	case SET_VLAN_INGRESS_PRIORITY_CMD:
+		err = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-		err = vlan_dev_set_ingress_priority(args.device1,
-						    args.u.skb_priority,
-						    args.vlan_qos);
+			break;
+		vlan_dev_set_ingress_priority(dev,
+					      args.u.skb_priority,
+					      args.vlan_qos);
 		break;
 
 	case SET_VLAN_EGRESS_PRIORITY_CMD:
+		err = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-		err = vlan_dev_set_egress_priority(args.device1,
+			break;
+		err = vlan_dev_set_egress_priority(dev,
 						   args.u.skb_priority,
 						   args.vlan_qos);
 		break;
 
 	case SET_VLAN_FLAG_CMD:
+		err = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-		err = vlan_dev_set_vlan_flag(args.device1,
+			break;
+		err = vlan_dev_set_vlan_flag(dev,
 					     args.u.flag,
 					     args.vlan_qos);
 		break;
 
 	case SET_VLAN_NAME_TYPE_CMD:
+		err = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
-		if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) {
+		if ((args.u.name_type >= 0) &&
+		    (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) {
 			vlan_name_type = args.u.name_type;
 			err = 0;
 		} else {
@@ -745,13 +727,10 @@ static int vlan_ioctl_handler(void __user *arg)
 		break;
 
 	case ADD_VLAN_CMD:
+		err = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-		/* we have been given the name of the Ethernet Device we want to
-		 * talk to:  args.dev1	 We also have the
-		 * VLAN ID:  args.u.VID
-		 */
-		if (register_vlan_device(args.device1, args.u.VID)) {
+			break;
+		if (register_vlan_device(dev, args.u.VID)) {
 			err = 0;
 		} else {
 			err = -EINVAL;
@@ -759,12 +738,10 @@ static int vlan_ioctl_handler(void __user *arg)
 		break;
 
 	case DEL_VLAN_CMD:
+		err = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-		/* Here, the args.dev1 is the actual VLAN we want
-		 * to get rid of.
-		 */
-		err = unregister_vlan_device(args.device1);
+			break;
+		err = unregister_vlan_device(dev);
 		break;
 
 	case GET_VLAN_INGRESS_PRIORITY_CMD:
@@ -788,9 +765,7 @@ static int vlan_ioctl_handler(void __user *arg)
 		err = -EINVAL;
 		break;
 	case GET_VLAN_REALDEV_NAME_CMD:
-		err = vlan_dev_get_realdev_name(args.device1, args.u.device2);
-		if (err)
-			goto out;
+		vlan_dev_get_realdev_name(dev, args.u.device2);
 		if (copy_to_user(arg, &args,
 				 sizeof(struct vlan_ioctl_args))) {
 			err = -EFAULT;
@@ -798,9 +773,7 @@ static int vlan_ioctl_handler(void __user *arg)
 		break;
 
 	case GET_VLAN_VID_CMD:
-		err = vlan_dev_get_vid(args.device1, &vid);
-		if (err)
-			goto out;
+		vlan_dev_get_vid(dev, &vid);
 		args.u.VID = vid;
 		if (copy_to_user(arg, &args,
 				 sizeof(struct vlan_ioctl_args))) {
@@ -812,9 +785,11 @@ static int vlan_ioctl_handler(void __user *arg)
 		/* pass on to underlying device instead?? */
 		printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n",
 			__FUNCTION__, args.cmd);
-		return -EINVAL;
+		err = -EINVAL;
+		break;
 	}
 out:
+	rtnl_unlock();
 	return err;
 }
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 1976cdb..b837390 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -62,11 +62,14 @@ int vlan_dev_set_mac_address(struct net_device *dev, void* addr);
 int vlan_dev_open(struct net_device* dev);
 int vlan_dev_stop(struct net_device* dev);
 int vlan_dev_ioctl(struct net_device* dev, struct ifreq *ifr, int cmd);
-int vlan_dev_set_ingress_priority(char* dev_name, __u32 skb_prio, short vlan_prio);
-int vlan_dev_set_egress_priority(char* dev_name, __u32 skb_prio, short vlan_prio);
-int vlan_dev_set_vlan_flag(char* dev_name, __u32 flag, short flag_val);
-int vlan_dev_get_realdev_name(const char* dev_name, char* result);
-int vlan_dev_get_vid(const char* dev_name, unsigned short* result);
+void vlan_dev_set_ingress_priority(const struct net_device *dev,
+				   u32 skb_prio, short vlan_prio);
+int vlan_dev_set_egress_priority(const struct net_device *dev,
+				 u32 skb_prio, short vlan_prio);
+int vlan_dev_set_vlan_flag(const struct net_device *dev,
+			   u32 flag, short flag_val);
+void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
+void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result);
 void vlan_dev_set_multicast_list(struct net_device *vlan_dev);
 
 #endif /* !(__BEN_VLAN_802_1Q_INC__) */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ec46084..05a2360 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -534,136 +534,68 @@ int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-int vlan_dev_set_ingress_priority(char *dev_name, __u32 skb_prio, short vlan_prio)
+void vlan_dev_set_ingress_priority(const struct net_device *dev,
+				   u32 skb_prio, short vlan_prio)
 {
-	struct net_device *dev = dev_get_by_name(dev_name);
-
-	if (dev) {
-		if (dev->priv_flags & IFF_802_1Q_VLAN) {
-			/* see if a priority mapping exists.. */
-			VLAN_DEV_INFO(dev)->ingress_priority_map[vlan_prio & 0x7] = skb_prio;
-			dev_put(dev);
-			return 0;
-		}
-
-		dev_put(dev);
-	}
-	return -EINVAL;
+	VLAN_DEV_INFO(dev)->ingress_priority_map[vlan_prio & 0x7] = skb_prio;
 }
 
-int vlan_dev_set_egress_priority(char *dev_name, __u32 skb_prio, short vlan_prio)
+int vlan_dev_set_egress_priority(const struct net_device *dev,
+				 u32 skb_prio, short vlan_prio)
 {
-	struct net_device *dev = dev_get_by_name(dev_name);
 	struct vlan_priority_tci_mapping *mp = NULL;
 	struct vlan_priority_tci_mapping *np;
 
-	if (dev) {
-		if (dev->priv_flags & IFF_802_1Q_VLAN) {
-			/* See if a priority mapping exists.. */
-			mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF];
-			while (mp) {
-				if (mp->priority == skb_prio) {
-					mp->vlan_qos = ((vlan_prio << 13) & 0xE000);
-					dev_put(dev);
-					return 0;
-				}
-				mp = mp->next;
-			}
-
-			/* Create a new mapping then. */
-			mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF];
-			np = kmalloc(sizeof(struct vlan_priority_tci_mapping), GFP_KERNEL);
-			if (np) {
-				np->next = mp;
-				np->priority = skb_prio;
-				np->vlan_qos = ((vlan_prio << 13) & 0xE000);
-				VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF] = np;
-				dev_put(dev);
-				return 0;
-			} else {
-				dev_put(dev);
-				return -ENOBUFS;
-			}
+	/* See if a priority mapping exists.. */
+	mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF];
+	while (mp) {
+		if (mp->priority == skb_prio) {
+			mp->vlan_qos = ((vlan_prio << 13) & 0xE000);
+			return 0;
 		}
-		dev_put(dev);
+		mp = mp->next;
 	}
-	return -EINVAL;
+
+	/* Create a new mapping then. */
+	mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF];
+	np = kmalloc(sizeof(struct vlan_priority_tci_mapping), GFP_KERNEL);
+	if (!np)
+		return -ENOBUFS;
+
+	np->next = mp;
+	np->priority = skb_prio;
+	np->vlan_qos = ((vlan_prio << 13) & 0xE000);
+	VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF] = np;
+	return 0;
 }
 
 /* Flags are defined in the vlan_dev_info class in include/linux/if_vlan.h file. */
-int vlan_dev_set_vlan_flag(char *dev_name, __u32 flag, short flag_val)
+int vlan_dev_set_vlan_flag(const struct net_device *dev,
+			   u32 flag, short flag_val)
 {
-	struct net_device *dev = dev_get_by_name(dev_name);
-
-	if (dev) {
-		if (dev->priv_flags & IFF_802_1Q_VLAN) {
-			/* verify flag is supported */
-			if (flag == 1) {
-				if (flag_val) {
-					VLAN_DEV_INFO(dev)->flags |= 1;
-				} else {
-					VLAN_DEV_INFO(dev)->flags &= ~1;
-				}
-				dev_put(dev);
-				return 0;
-			} else {
-				printk(KERN_ERR  "%s: flag %i is not valid.\n",
-					__FUNCTION__, (int)(flag));
-				dev_put(dev);
-				return -EINVAL;
-			}
+	/* verify flag is supported */
+	if (flag == 1) {
+		if (flag_val) {
+			VLAN_DEV_INFO(dev)->flags |= 1;
 		} else {
-			printk(KERN_ERR
-			       "%s: %s is not a vlan device, priv_flags: %hX.\n",
-			       __FUNCTION__, dev->name, dev->priv_flags);
-			dev_put(dev);
+			VLAN_DEV_INFO(dev)->flags &= ~1;
 		}
-	} else {
-		printk(KERN_ERR  "%s: Could not find device: %s\n",
-			__FUNCTION__, dev_name);
+		return 0;
 	}
-
+	printk(KERN_ERR "%s: flag %i is not valid.\n", __FUNCTION__, flag);
 	return -EINVAL;
 }
 
-
-int vlan_dev_get_realdev_name(const char *dev_name, char* result)
+void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
 {
-	struct net_device *dev = dev_get_by_name(dev_name);
-	int rv = 0;
-	if (dev) {
-		if (dev->priv_flags & IFF_802_1Q_VLAN) {
-			strncpy(result, VLAN_DEV_INFO(dev)->real_dev->name, 23);
-			rv = 0;
-		} else {
-			rv = -EINVAL;
-		}
-		dev_put(dev);
-	} else {
-		rv = -ENODEV;
-	}
-	return rv;
+	strncpy(result, VLAN_DEV_INFO(dev)->real_dev->name, 23);
 }
 
-int vlan_dev_get_vid(const char *dev_name, unsigned short* result)
+void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result)
 {
-	struct net_device *dev = dev_get_by_name(dev_name);
-	int rv = 0;
-	if (dev) {
-		if (dev->priv_flags & IFF_802_1Q_VLAN) {
-			*result = VLAN_DEV_INFO(dev)->vlan_id;
-			rv = 0;
-		} else {
-			rv = -EINVAL;
-		}
-		dev_put(dev);
-	} else {
-		rv = -ENODEV;
-	}
-	return rv;
+	*result = VLAN_DEV_INFO(dev)->vlan_id;
 }
 
-
 int vlan_dev_set_mac_address(struct net_device *dev, void *addr_struct_p)
 {
 	struct sockaddr *addr = (struct sockaddr *)(addr_struct_p);
-- 
cgit v0.10.2


From 2f4284a406cb25d1e41454cbf9ec4545b5ed70a1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:05:41 -0700
Subject: [VLAN]: Move some device intialization code to dev->init callback

Move some device initialization code to new dev->init callback to make
it shareable with netlink. Additionally this fixes a minor bug, dev->iflink
is set after registration, which causes an incorrect value in the initial
netlink message.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 3678f07..dc95f7c 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -291,6 +291,48 @@ static int unregister_vlan_device(struct net_device *dev)
 	return ret;
 }
 
+/*
+ * vlan network devices have devices nesting below it, and are a special
+ * "super class" of normal network devices; split their locks off into a
+ * separate class since they always nest.
+ */
+static struct lock_class_key vlan_netdev_xmit_lock_key;
+
+static int vlan_dev_init(struct net_device *dev)
+{
+	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
+
+	/* IFF_BROADCAST|IFF_MULTICAST; ??? */
+	dev->flags  = real_dev->flags & ~IFF_UP;
+	dev->iflink = real_dev->ifindex;
+	dev->state  = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
+					  (1<<__LINK_STATE_DORMANT))) |
+		      (1<<__LINK_STATE_PRESENT);
+
+	/* TODO: maybe just assign it to be ETHERNET? */
+	dev->type = real_dev->type;
+
+	memcpy(dev->broadcast, real_dev->broadcast, real_dev->addr_len);
+	memcpy(dev->dev_addr, real_dev->dev_addr, real_dev->addr_len);
+	dev->addr_len = real_dev->addr_len;
+
+	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
+		dev->hard_header     = real_dev->hard_header;
+		dev->hard_header_len = real_dev->hard_header_len;
+		dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit;
+		dev->rebuild_header  = real_dev->rebuild_header;
+	} else {
+		dev->hard_header     = vlan_dev_hard_header;
+		dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
+		dev->hard_start_xmit = vlan_dev_hard_start_xmit;
+		dev->rebuild_header  = vlan_dev_rebuild_header;
+	}
+	dev->hard_header_parse = real_dev->hard_header_parse;
+
+	lockdep_set_class(&dev->_xmit_lock, &vlan_netdev_xmit_lock_key);
+	return 0;
+}
+
 static void vlan_setup(struct net_device *new_dev)
 {
 	SET_MODULE_OWNER(new_dev);
@@ -311,6 +353,7 @@ static void vlan_setup(struct net_device *new_dev)
 
 	/* set up method calls */
 	new_dev->change_mtu = vlan_dev_change_mtu;
+	new_dev->init = vlan_dev_init;
 	new_dev->open = vlan_dev_open;
 	new_dev->stop = vlan_dev_stop;
 	new_dev->set_mac_address = vlan_dev_set_mac_address;
@@ -339,14 +382,6 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev
 	}
 }
 
-/*
- * vlan network devices have devices nesting below it, and are a special
- * "super class" of normal network devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key vlan_netdev_xmit_lock_key;
-
-
 /*  Attach a VLAN device to a mac address (ie Ethernet Card).
  *  Returns the device that was created, or NULL if there was
  *  an error of some kind.
@@ -435,49 +470,17 @@ static struct net_device *register_vlan_device(struct net_device *real_dev,
 	if (new_dev == NULL)
 		goto out_ret_null;
 
-#ifdef VLAN_DEBUG
-	printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name);
-#endif
-	/* IFF_BROADCAST|IFF_MULTICAST; ??? */
-	new_dev->flags = real_dev->flags;
-	new_dev->flags &= ~IFF_UP;
-
-	new_dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
-					     (1<<__LINK_STATE_DORMANT))) |
-			 (1<<__LINK_STATE_PRESENT);
-
 	/* need 4 bytes for extra VLAN header info,
 	 * hope the underlying device can handle it.
 	 */
 	new_dev->mtu = real_dev->mtu;
 
-	/* TODO: maybe just assign it to be ETHERNET? */
-	new_dev->type = real_dev->type;
-
-	new_dev->hard_header_len = real_dev->hard_header_len;
-	if (!(real_dev->features & NETIF_F_HW_VLAN_TX)) {
-		/* Regular ethernet + 4 bytes (18 total). */
-		new_dev->hard_header_len += VLAN_HLEN;
-	}
-
+#ifdef VLAN_DEBUG
+	printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name);
 	VLAN_MEM_DBG("new_dev->priv malloc, addr: %p  size: %i\n",
 		     new_dev->priv,
 		     sizeof(struct vlan_dev_info));
-
-	memcpy(new_dev->broadcast, real_dev->broadcast, real_dev->addr_len);
-	memcpy(new_dev->dev_addr, real_dev->dev_addr, real_dev->addr_len);
-	new_dev->addr_len = real_dev->addr_len;
-
-	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
-		new_dev->hard_header = real_dev->hard_header;
-		new_dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit;
-		new_dev->rebuild_header = real_dev->rebuild_header;
-	} else {
-		new_dev->hard_header = vlan_dev_hard_header;
-		new_dev->hard_start_xmit = vlan_dev_hard_start_xmit;
-		new_dev->rebuild_header = vlan_dev_rebuild_header;
-	}
-	new_dev->hard_header_parse = real_dev->hard_header_parse;
+#endif
 
 	VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
 	VLAN_DEV_INFO(new_dev)->real_dev = real_dev;
@@ -492,9 +495,6 @@ static struct net_device *register_vlan_device(struct net_device *real_dev,
 	if (register_netdevice(new_dev))
 		goto out_free_newdev;
 
-	lockdep_set_class(&new_dev->_xmit_lock, &vlan_netdev_xmit_lock_key);
-
-	new_dev->iflink = real_dev->ifindex;
 	vlan_transfer_operstate(real_dev, new_dev);
 	linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */
 
-- 
cgit v0.10.2


From 42429aaee5eb44f4a48fdb056d77d0c06ef5aebc Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:05:59 -0700
Subject: [VLAN]: Move vlan_group allocation to seperate function

Move group allocation to a seperate function to clean up the code a bit
and allocate groups before registering the device. Device registration
is globally visible and causes netlink events, so we shouldn't fail
afterwards.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index dc95f7c..1b9dc5e 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -197,6 +197,34 @@ static void vlan_group_free(struct vlan_group *grp)
 	kfree(grp);
 }
 
+static struct vlan_group *vlan_group_alloc(int ifindex)
+{
+	struct vlan_group *grp;
+	unsigned int size;
+	unsigned int i;
+
+	grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL);
+	if (!grp)
+		return NULL;
+
+	size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN;
+
+	for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) {
+		grp->vlan_devices_arrays[i] = kzalloc(size, GFP_KERNEL);
+		if (!grp->vlan_devices_arrays[i])
+			goto err;
+	}
+
+	grp->real_dev_ifindex = ifindex;
+	hlist_add_head_rcu(&grp->hlist,
+			   &vlan_group_hash[vlan_grp_hashfn(ifindex)]);
+	return grp;
+
+err:
+	vlan_group_free(grp);
+	return NULL;
+}
+
 static void vlan_rcu_free(struct rcu_head *rcu)
 {
 	vlan_group_free(container_of(rcu, struct vlan_group, rcu));
@@ -389,10 +417,9 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev
 static struct net_device *register_vlan_device(struct net_device *real_dev,
 					       unsigned short VLAN_ID)
 {
-	struct vlan_group *grp;
+	struct vlan_group *grp, *ngrp = NULL;
 	struct net_device *new_dev;
 	char name[IFNAMSIZ];
-	int i;
 
 #ifdef VLAN_DEBUG
 	printk(VLAN_DBG "%s: if_name -:%s:-	vid: %i\n",
@@ -491,9 +518,15 @@ static struct net_device *register_vlan_device(struct net_device *real_dev,
 	printk(VLAN_DBG "About to go find the group for idx: %i\n",
 	       real_dev->ifindex);
 #endif
+	grp = __vlan_find_group(real_dev->ifindex);
+	if (!grp) {
+		ngrp = grp = vlan_group_alloc(real_dev->ifindex);
+		if (!grp)
+			goto out_free_newdev;
+	}
 
 	if (register_netdevice(new_dev))
-		goto out_free_newdev;
+		goto out_free_group;
 
 	vlan_transfer_operstate(real_dev, new_dev);
 	linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */
@@ -501,34 +534,8 @@ static struct net_device *register_vlan_device(struct net_device *real_dev,
 	/* So, got the sucker initialized, now lets place
 	 * it into our local structure.
 	 */
-	grp = __vlan_find_group(real_dev->ifindex);
-
-	/* Note, we are running under the RTNL semaphore
-	 * so it cannot "appear" on us.
-	 */
-	if (!grp) { /* need to add a new group */
-		grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL);
-		if (!grp)
-			goto out_free_unregister;
-
-		for (i=0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) {
-			grp->vlan_devices_arrays[i] = kzalloc(
-				sizeof(struct net_device *)*VLAN_GROUP_ARRAY_PART_LEN,
-				GFP_KERNEL);
-
-			if (!grp->vlan_devices_arrays[i])
-				goto out_free_arrays;
-		}
-
-		/* printk(KERN_ALERT "VLAN REGISTER:  Allocated new group.\n"); */
-		grp->real_dev_ifindex = real_dev->ifindex;
-
-		hlist_add_head_rcu(&grp->hlist,
-				   &vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]);
-
-		if (real_dev->features & NETIF_F_HW_VLAN_RX)
-			real_dev->vlan_rx_register(real_dev, grp);
-	}
+	if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
+		real_dev->vlan_rx_register(real_dev, ngrp);
 
 	vlan_group_set_device(grp, VLAN_ID, new_dev);
 
@@ -546,12 +553,9 @@ static struct net_device *register_vlan_device(struct net_device *real_dev,
 #endif
 	return new_dev;
 
-out_free_arrays:
-	vlan_group_free(grp);
-
-out_free_unregister:
-	unregister_netdev(new_dev);
-	goto out_ret_null;
+out_free_group:
+	if (ngrp)
+		vlan_group_free(ngrp);
 
 out_free_newdev:
 	free_netdev(new_dev);
-- 
cgit v0.10.2


From c1d3ee9925ca714a5ed3f8fce01a7027137f4e3f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:06:14 -0700
Subject: [VLAN]: Split up device checks

Move the checks of the underlying device to a seperate function.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 1b9dc5e..1e33dbb 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -410,57 +410,65 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev
 	}
 }
 
-/*  Attach a VLAN device to a mac address (ie Ethernet Card).
- *  Returns the device that was created, or NULL if there was
- *  an error of some kind.
- */
-static struct net_device *register_vlan_device(struct net_device *real_dev,
-					       unsigned short VLAN_ID)
+static int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id)
 {
-	struct vlan_group *grp, *ngrp = NULL;
-	struct net_device *new_dev;
-	char name[IFNAMSIZ];
-
-#ifdef VLAN_DEBUG
-	printk(VLAN_DBG "%s: if_name -:%s:-	vid: %i\n",
-		__FUNCTION__, eth_IF_name, VLAN_ID);
-#endif
-
-	if (VLAN_ID >= VLAN_VID_MASK)
-		goto out_ret_null;
-
 	if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
 		printk(VLAN_DBG "%s: VLANs not supported on %s.\n",
 			__FUNCTION__, real_dev->name);
-		goto out_ret_null;
+		return -EOPNOTSUPP;
 	}
 
 	if ((real_dev->features & NETIF_F_HW_VLAN_RX) &&
 	    !real_dev->vlan_rx_register) {
 		printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n",
 			__FUNCTION__, real_dev->name);
-		goto out_ret_null;
+		return -EOPNOTSUPP;
 	}
 
 	if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
 	    (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) {
 		printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n",
 			__FUNCTION__, real_dev->name);
-		goto out_ret_null;
+		return -EOPNOTSUPP;
 	}
 
 	/* The real device must be up and operating in order to
 	 * assosciate a VLAN device with it.
 	 */
 	if (!(real_dev->flags & IFF_UP))
-		goto out_ret_null;
+		return -ENETDOWN;
 
-	if (__find_vlan_dev(real_dev, VLAN_ID) != NULL) {
+	if (__find_vlan_dev(real_dev, vlan_id) != NULL) {
 		/* was already registered. */
 		printk(VLAN_DBG "%s: ALREADY had VLAN registered\n", __FUNCTION__);
-		goto out_ret_null;
+		return -EEXIST;
 	}
 
+	return 0;
+}
+
+/*  Attach a VLAN device to a mac address (ie Ethernet Card).
+ *  Returns the device that was created, or NULL if there was
+ *  an error of some kind.
+ */
+static struct net_device *register_vlan_device(struct net_device *real_dev,
+					       unsigned short VLAN_ID)
+{
+	struct vlan_group *grp, *ngrp = NULL;
+	struct net_device *new_dev;
+	char name[IFNAMSIZ];
+
+#ifdef VLAN_DEBUG
+	printk(VLAN_DBG "%s: if_name -:%s:-	vid: %i\n",
+		__FUNCTION__, eth_IF_name, VLAN_ID);
+#endif
+
+	if (VLAN_ID >= VLAN_VID_MASK)
+		goto out_ret_null;
+
+	if (vlan_check_real_dev(real_dev, VLAN_ID) < 0)
+		goto out_ret_null;
+
 	/* Gotta set up the fields for the device. */
 #ifdef VLAN_DEBUG
 	printk(VLAN_DBG "About to allocate name, vlan_name_type: %i\n",
-- 
cgit v0.10.2


From e89fe42cd03c8fd3686df82d8390a235717a66de Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:06:29 -0700
Subject: [VLAN]: Move device registation to seperate function

Move device registration and configuration of the underlying device to a
seperate function.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 1e33dbb..e68b503 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -447,6 +447,51 @@ static int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_
 	return 0;
 }
 
+static int register_vlan_dev(struct net_device *dev)
+{
+	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct net_device *real_dev = vlan->real_dev;
+	unsigned short vlan_id = vlan->vlan_id;
+	struct vlan_group *grp, *ngrp = NULL;
+	int err;
+
+	grp = __vlan_find_group(real_dev->ifindex);
+	if (!grp) {
+		ngrp = grp = vlan_group_alloc(real_dev->ifindex);
+		if (!grp)
+			return -ENOBUFS;
+	}
+
+	err = register_netdevice(dev);
+	if (err < 0)
+		goto out_free_group;
+
+	/* Account for reference in struct vlan_dev_info */
+	dev_hold(real_dev);
+
+	vlan_transfer_operstate(real_dev, dev);
+	linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */
+
+	/* So, got the sucker initialized, now lets place
+	 * it into our local structure.
+	 */
+	vlan_group_set_device(grp, vlan_id, dev);
+	if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
+		real_dev->vlan_rx_register(real_dev, ngrp);
+	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
+		real_dev->vlan_rx_add_vid(real_dev, vlan_id);
+
+	if (vlan_proc_add_dev(dev) < 0)
+		printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n",
+		       dev->name);
+	return 0;
+
+out_free_group:
+	if (ngrp)
+		vlan_group_free(ngrp);
+	return err;
+}
+
 /*  Attach a VLAN device to a mac address (ie Ethernet Card).
  *  Returns the device that was created, or NULL if there was
  *  an error of some kind.
@@ -454,7 +499,6 @@ static int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_
 static struct net_device *register_vlan_device(struct net_device *real_dev,
 					       unsigned short VLAN_ID)
 {
-	struct vlan_group *grp, *ngrp = NULL;
 	struct net_device *new_dev;
 	char name[IFNAMSIZ];
 
@@ -522,37 +566,8 @@ static struct net_device *register_vlan_device(struct net_device *real_dev,
 	VLAN_DEV_INFO(new_dev)->dent = NULL;
 	VLAN_DEV_INFO(new_dev)->flags = 1;
 
-#ifdef VLAN_DEBUG
-	printk(VLAN_DBG "About to go find the group for idx: %i\n",
-	       real_dev->ifindex);
-#endif
-	grp = __vlan_find_group(real_dev->ifindex);
-	if (!grp) {
-		ngrp = grp = vlan_group_alloc(real_dev->ifindex);
-		if (!grp)
-			goto out_free_newdev;
-	}
-
-	if (register_netdevice(new_dev))
-		goto out_free_group;
-
-	vlan_transfer_operstate(real_dev, new_dev);
-	linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */
-
-	/* So, got the sucker initialized, now lets place
-	 * it into our local structure.
-	 */
-	if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
-		real_dev->vlan_rx_register(real_dev, ngrp);
-
-	vlan_group_set_device(grp, VLAN_ID, new_dev);
-
-	if (vlan_proc_add_dev(new_dev)<0)/* create it's proc entry */
-		printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n",
-							 new_dev->name);
-
-	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
-		real_dev->vlan_rx_add_vid(real_dev, VLAN_ID);
+	if (register_vlan_dev(new_dev) < 0)
+		goto out_free_newdev;
 
 	/* Account for reference in struct vlan_dev_info */
 	dev_hold(real_dev);
@@ -561,10 +576,6 @@ static struct net_device *register_vlan_device(struct net_device *real_dev,
 #endif
 	return new_dev;
 
-out_free_group:
-	if (ngrp)
-		vlan_group_free(ngrp);
-
 out_free_newdev:
 	free_netdev(new_dev);
 
-- 
cgit v0.10.2


From 2ae0bf69b716d07126f0a9c17fcc2d76da172cb6 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:06:43 -0700
Subject: [VLAN]: Return proper error codes in register_vlan_device

The returned device is unused, return proper error codes instead and avoid
having the ioctl handler guess the error.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index e68b503..5801993 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -493,14 +493,14 @@ out_free_group:
 }
 
 /*  Attach a VLAN device to a mac address (ie Ethernet Card).
- *  Returns the device that was created, or NULL if there was
- *  an error of some kind.
+ *  Returns 0 if the device was created or a negative error code otherwise.
  */
-static struct net_device *register_vlan_device(struct net_device *real_dev,
-					       unsigned short VLAN_ID)
+static int register_vlan_device(struct net_device *real_dev,
+				unsigned short VLAN_ID)
 {
 	struct net_device *new_dev;
 	char name[IFNAMSIZ];
+	int err;
 
 #ifdef VLAN_DEBUG
 	printk(VLAN_DBG "%s: if_name -:%s:-	vid: %i\n",
@@ -508,10 +508,11 @@ static struct net_device *register_vlan_device(struct net_device *real_dev,
 #endif
 
 	if (VLAN_ID >= VLAN_VID_MASK)
-		goto out_ret_null;
+		return -ERANGE;
 
-	if (vlan_check_real_dev(real_dev, VLAN_ID) < 0)
-		goto out_ret_null;
+	err = vlan_check_real_dev(real_dev, VLAN_ID);
+	if (err < 0)
+		return err;
 
 	/* Gotta set up the fields for the device. */
 #ifdef VLAN_DEBUG
@@ -547,7 +548,7 @@ static struct net_device *register_vlan_device(struct net_device *real_dev,
 			       vlan_setup);
 
 	if (new_dev == NULL)
-		goto out_ret_null;
+		return -ENOBUFS;
 
 	/* need 4 bytes for extra VLAN header info,
 	 * hope the underlying device can handle it.
@@ -566,7 +567,8 @@ static struct net_device *register_vlan_device(struct net_device *real_dev,
 	VLAN_DEV_INFO(new_dev)->dent = NULL;
 	VLAN_DEV_INFO(new_dev)->flags = 1;
 
-	if (register_vlan_dev(new_dev) < 0)
+	err = register_vlan_dev(new_dev);
+	if (err < 0)
 		goto out_free_newdev;
 
 	/* Account for reference in struct vlan_dev_info */
@@ -574,13 +576,11 @@ static struct net_device *register_vlan_device(struct net_device *real_dev,
 #ifdef VLAN_DEBUG
 	printk(VLAN_DBG "Allocated new device successfully, returning.\n");
 #endif
-	return new_dev;
+	return 0;
 
 out_free_newdev:
 	free_netdev(new_dev);
-
-out_ret_null:
-	return NULL;
+	return err;
 }
 
 static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
@@ -753,11 +753,7 @@ static int vlan_ioctl_handler(void __user *arg)
 		err = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
 			break;
-		if (register_vlan_device(dev, args.u.VID)) {
-			err = 0;
-		} else {
-			err = -EINVAL;
-		}
+		err = register_vlan_device(dev, args.u.VID);
 		break;
 
 	case DEL_VLAN_CMD:
-- 
cgit v0.10.2


From 734423cf38021966a5d3bd5f5c6aaecaf32fb4ac Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:07:07 -0700
Subject: [VLAN]: Use 32 bit value for skb->priority mapping

skb->priority has only 32 bits and even VLAN uses 32 bit values in its API.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 81e9bc9..aeddb49 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -99,7 +99,7 @@ static inline void vlan_group_set_device(struct vlan_group *vg, int vlan_id,
 }
 
 struct vlan_priority_tci_mapping {
-	unsigned long priority;
+	u32 priority;
 	unsigned short vlan_qos; /* This should be shifted when first set, so we only do it
 				  * at provisioning time.
 				  * ((skb->priority << 13) & 0xE000)
@@ -112,7 +112,7 @@ struct vlan_dev_info {
 	/** This will be the mapping that correlates skb->priority to
 	 * 3 bits of VLAN QOS tags...
 	 */
-	unsigned long ingress_priority_map[8];
+	u32 ingress_priority_map[8];
 	struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */
 
 	unsigned short vlan_id;        /*  The VLAN Identifier for this interface. */
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index d216a64..8693b21 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -342,7 +342,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	seq_printf(seq, "Device: %s", dev_info->real_dev->name);
 	/* now show all PRIORITY mappings relating to this VLAN */
 	seq_printf(seq,
-		       "\nINGRESS priority mappings: 0:%lu  1:%lu  2:%lu  3:%lu  4:%lu  5:%lu  6:%lu 7:%lu\n",
+		       "\nINGRESS priority mappings: 0:%u  1:%u  2:%u  3:%u  4:%u  5:%u  6:%u 7:%u\n",
 		       dev_info->ingress_priority_map[0],
 		       dev_info->ingress_priority_map[1],
 		       dev_info->ingress_priority_map[2],
@@ -357,7 +357,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 		const struct vlan_priority_tci_mapping *mp
 			= dev_info->egress_priority_map[i];
 		while (mp) {
-			seq_printf(seq, "%lu:%hu ",
+			seq_printf(seq, "%u:%hu ",
 				   mp->priority, ((mp->vlan_qos >> 13) & 0x7));
 			mp = mp->next;
 		}
-- 
cgit v0.10.2


From b020cb488586f982f40eb257a32e92a4de710d65 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:07:22 -0700
Subject: [VLAN]: Keep track of number of QoS mappings

Keep track of the number of configured ingress/egress QoS mappings to
avoid iteration while calculating the netlink attribute size.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index aeddb49..b46d422 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -112,7 +112,10 @@ struct vlan_dev_info {
 	/** This will be the mapping that correlates skb->priority to
 	 * 3 bits of VLAN QOS tags...
 	 */
+	unsigned int nr_ingress_mappings;
 	u32 ingress_priority_map[8];
+
+	unsigned int nr_egress_mappings;
 	struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */
 
 	unsigned short vlan_id;        /*  The VLAN Identifier for this interface. */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 05a2360..4f6ede7 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -537,35 +537,50 @@ int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 void vlan_dev_set_ingress_priority(const struct net_device *dev,
 				   u32 skb_prio, short vlan_prio)
 {
-	VLAN_DEV_INFO(dev)->ingress_priority_map[vlan_prio & 0x7] = skb_prio;
+	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+
+	if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio)
+		vlan->nr_ingress_mappings--;
+	else if (!vlan->ingress_priority_map[vlan_prio & 0x7] && skb_prio)
+		vlan->nr_ingress_mappings++;
+
+	vlan->ingress_priority_map[vlan_prio & 0x7] = skb_prio;
 }
 
 int vlan_dev_set_egress_priority(const struct net_device *dev,
 				 u32 skb_prio, short vlan_prio)
 {
+	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
 	struct vlan_priority_tci_mapping *mp = NULL;
 	struct vlan_priority_tci_mapping *np;
+	u32 vlan_qos = (vlan_prio << 13) & 0xE000;
 
 	/* See if a priority mapping exists.. */
-	mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF];
+	mp = vlan->egress_priority_map[skb_prio & 0xF];
 	while (mp) {
 		if (mp->priority == skb_prio) {
-			mp->vlan_qos = ((vlan_prio << 13) & 0xE000);
+			if (mp->vlan_qos && !vlan_qos)
+				vlan->nr_egress_mappings--;
+			else if (!mp->vlan_qos && vlan_qos)
+				vlan->nr_egress_mappings++;
+			mp->vlan_qos = vlan_qos;
 			return 0;
 		}
 		mp = mp->next;
 	}
 
 	/* Create a new mapping then. */
-	mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF];
+	mp = vlan->egress_priority_map[skb_prio & 0xF];
 	np = kmalloc(sizeof(struct vlan_priority_tci_mapping), GFP_KERNEL);
 	if (!np)
 		return -ENOBUFS;
 
 	np->next = mp;
 	np->priority = skb_prio;
-	np->vlan_qos = ((vlan_prio << 13) & 0xE000);
-	VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF] = np;
+	np->vlan_qos = vlan_qos;
+	vlan->egress_priority_map[skb_prio & 0xF] = np;
+	if (vlan_qos)
+		vlan->nr_egress_mappings++;
 	return 0;
 }
 
-- 
cgit v0.10.2


From a4bf3af4ac46802436d352ef409cee4fe80445b3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:07:37 -0700
Subject: [VLAN]: Introduce symbolic constants for flag values

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index b46d422..c791287 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -398,6 +398,10 @@ enum vlan_ioctl_cmds {
 	GET_VLAN_VID_CMD /* Get the VID of this VLAN (specified by name) */
 };
 
+enum vlan_flags {
+	VLAN_FLAG_REORDER_HDR	= 0x1,
+};
+
 enum vlan_name_types {
 	VLAN_NAME_TYPE_PLUS_VID, /* Name will look like:  vlan0005 */
 	VLAN_NAME_TYPE_RAW_PLUS_VID, /* name will look like:  eth1.0005 */
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 5801993..f12f914 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -565,7 +565,7 @@ static int register_vlan_device(struct net_device *real_dev,
 	VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
 	VLAN_DEV_INFO(new_dev)->real_dev = real_dev;
 	VLAN_DEV_INFO(new_dev)->dent = NULL;
-	VLAN_DEV_INFO(new_dev)->flags = 1;
+	VLAN_DEV_INFO(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
 
 	err = register_vlan_dev(new_dev);
 	if (err < 0)
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 4f6ede7..95afe38 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -73,7 +73,7 @@ int vlan_dev_rebuild_header(struct sk_buff *skb)
 
 static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
 {
-	if (VLAN_DEV_INFO(skb->dev)->flags & 1) {
+	if (VLAN_DEV_INFO(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
 		if (skb_shared(skb) || skb_cloned(skb)) {
 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
 			kfree_skb(skb);
@@ -350,7 +350,8 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 	 * header shuffling in the hard_start_xmit.  Users can turn off this
 	 * REORDER behaviour with the vconfig tool.
 	 */
-	build_vlan_header = ((VLAN_DEV_INFO(dev)->flags & 1) == 0);
+	if (!(VLAN_DEV_INFO(dev)->flags & VLAN_FLAG_REORDER_HDR))
+		build_vlan_header = 1;
 
 	if (build_vlan_header) {
 		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
@@ -584,16 +585,16 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 	return 0;
 }
 
-/* Flags are defined in the vlan_dev_info class in include/linux/if_vlan.h file. */
+/* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */
 int vlan_dev_set_vlan_flag(const struct net_device *dev,
 			   u32 flag, short flag_val)
 {
 	/* verify flag is supported */
-	if (flag == 1) {
+	if (flag == VLAN_FLAG_REORDER_HDR) {
 		if (flag_val) {
-			VLAN_DEV_INFO(dev)->flags |= 1;
+			VLAN_DEV_INFO(dev)->flags |= VLAN_FLAG_REORDER_HDR;
 		} else {
-			VLAN_DEV_INFO(dev)->flags &= ~1;
+			VLAN_DEV_INFO(dev)->flags &= ~VLAN_FLAG_REORDER_HDR;
 		}
 		return 0;
 	}
-- 
cgit v0.10.2


From 07b5b17e157b7018d0ca40ca0d1581a23096fb45 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:07:54 -0700
Subject: [VLAN]: Use rtnl_link API

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 3144bab..422084d 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -153,4 +153,38 @@ enum
 
 #define IFLA_INFO_MAX	(__IFLA_INFO_MAX - 1)
 
+/* VLAN section */
+
+enum
+{
+	IFLA_VLAN_UNSPEC,
+	IFLA_VLAN_ID,
+	IFLA_VLAN_FLAGS,
+	IFLA_VLAN_EGRESS_QOS,
+	IFLA_VLAN_INGRESS_QOS,
+	__IFLA_VLAN_MAX,
+};
+
+#define IFLA_VLAN_MAX	(__IFLA_VLAN_MAX - 1)
+
+struct ifla_vlan_flags {
+	__u32	flags;
+	__u32	mask;
+};
+
+enum
+{
+	IFLA_VLAN_QOS_UNSPEC,
+	IFLA_VLAN_QOS_MAPPING,
+	__IFLA_VLAN_QOS_MAX
+};
+
+#define IFLA_VLAN_QOS_MAX	(__IFLA_VLAN_QOS_MAX - 1)
+
+struct ifla_vlan_qos_mapping
+{
+	__u32 from;
+	__u32 to;
+};
+
 #endif /* _LINUX_IF_LINK_H */
diff --git a/net/8021q/Makefile b/net/8021q/Makefile
index 97feb44..10ca7f4 100644
--- a/net/8021q/Makefile
+++ b/net/8021q/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_VLAN_8021Q) += 8021q.o
 
-8021q-objs := vlan.o vlan_dev.o
+8021q-objs := vlan.o vlan_dev.o vlan_netlink.o
 
 ifeq ($(CONFIG_PROC_FS),y)
 8021q-objs += vlanproc.o
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index f12f914..e7583ee 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -97,15 +97,22 @@ static int __init vlan_proto_init(void)
 
 	/* Register us to receive netdevice events */
 	err = register_netdevice_notifier(&vlan_notifier_block);
-	if (err < 0) {
-		dev_remove_pack(&vlan_packet_type);
-		vlan_proc_cleanup();
-		return err;
-	}
+	if (err < 0)
+		goto err1;
 
-	vlan_ioctl_set(vlan_ioctl_handler);
+	err = vlan_netlink_init();
+	if (err < 0)
+		goto err2;
 
+	vlan_ioctl_set(vlan_ioctl_handler);
 	return 0;
+
+err2:
+	unregister_netdevice_notifier(&vlan_notifier_block);
+err1:
+	vlan_proc_cleanup();
+	dev_remove_pack(&vlan_packet_type);
+	return err;
 }
 
 /* Cleanup all vlan devices
@@ -136,6 +143,7 @@ static void __exit vlan_cleanup_module(void)
 {
 	int i;
 
+	vlan_netlink_fini();
 	vlan_ioctl_set(NULL);
 
 	/* Un-register us from receiving netdevice events */
@@ -306,7 +314,7 @@ static int unregister_vlan_dev(struct net_device *real_dev,
 	return ret;
 }
 
-static int unregister_vlan_device(struct net_device *dev)
+int unregister_vlan_device(struct net_device *dev)
 {
 	int ret;
 
@@ -361,7 +369,7 @@ static int vlan_dev_init(struct net_device *dev)
 	return 0;
 }
 
-static void vlan_setup(struct net_device *new_dev)
+void vlan_setup(struct net_device *new_dev)
 {
 	SET_MODULE_OWNER(new_dev);
 
@@ -410,7 +418,7 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev
 	}
 }
 
-static int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id)
+int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id)
 {
 	if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
 		printk(VLAN_DBG "%s: VLANs not supported on %s.\n",
@@ -447,7 +455,7 @@ static int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_
 	return 0;
 }
 
-static int register_vlan_dev(struct net_device *dev)
+int register_vlan_dev(struct net_device *dev)
 {
 	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
 	struct net_device *real_dev = vlan->real_dev;
@@ -567,6 +575,7 @@ static int register_vlan_device(struct net_device *real_dev,
 	VLAN_DEV_INFO(new_dev)->dent = NULL;
 	VLAN_DEV_INFO(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
 
+	new_dev->rtnl_link_ops = &vlan_link_ops;
 	err = register_vlan_dev(new_dev);
 	if (err < 0)
 		goto out_free_newdev;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index b837390..fe6bb0f 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -72,4 +72,14 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
 void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result);
 void vlan_dev_set_multicast_list(struct net_device *vlan_dev);
 
+int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id);
+void vlan_setup(struct net_device *dev);
+int register_vlan_dev(struct net_device *dev);
+int unregister_vlan_device(struct net_device *dev);
+
+int vlan_netlink_init(void);
+void vlan_netlink_fini(void);
+
+extern struct rtnl_link_ops vlan_link_ops;
+
 #endif /* !(__BEN_VLAN_802_1Q_INC__) */
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
new file mode 100644
index 0000000..844c7e4
--- /dev/null
+++ b/net/8021q/vlan_netlink.c
@@ -0,0 +1,236 @@
+/*
+ *	VLAN netlink control interface
+ *
+ * 	Copyright (c) 2007 Patrick McHardy <kaber@trash.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+#include <net/netlink.h>
+#include <net/rtnetlink.h>
+#include "vlan.h"
+
+
+static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = {
+	[IFLA_VLAN_ID]		= { .type = NLA_U16 },
+	[IFLA_VLAN_FLAGS]	= { .len = sizeof(struct ifla_vlan_flags) },
+	[IFLA_VLAN_EGRESS_QOS]	= { .type = NLA_NESTED },
+	[IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = {
+	[IFLA_VLAN_QOS_MAPPING] = { .len = sizeof(struct ifla_vlan_qos_mapping) },
+};
+
+
+static inline int vlan_validate_qos_map(struct nlattr *attr)
+{
+	if (!attr)
+		return 0;
+	return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy);
+}
+
+static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	struct ifla_vlan_flags *flags;
+	u16 id;
+	int err;
+
+	if (!data)
+		return -EINVAL;
+
+	if (data[IFLA_VLAN_ID]) {
+		id = nla_get_u16(data[IFLA_VLAN_ID]);
+		if (id >= VLAN_VID_MASK)
+			return -ERANGE;
+	}
+	if (data[IFLA_VLAN_FLAGS]) {
+		flags = nla_data(data[IFLA_VLAN_FLAGS]);
+		if ((flags->flags & flags->mask) & ~VLAN_FLAG_REORDER_HDR)
+			return -EINVAL;
+	}
+
+	err = vlan_validate_qos_map(data[IFLA_VLAN_INGRESS_QOS]);
+	if (err < 0)
+		return err;
+	err = vlan_validate_qos_map(data[IFLA_VLAN_EGRESS_QOS]);
+	if (err < 0)
+		return err;
+	return 0;
+}
+
+static int vlan_changelink(struct net_device *dev,
+			   struct nlattr *tb[], struct nlattr *data[])
+{
+	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct ifla_vlan_flags *flags;
+	struct ifla_vlan_qos_mapping *m;
+	struct nlattr *attr;
+	int rem;
+
+	if (data[IFLA_VLAN_FLAGS]) {
+		flags = nla_data(data[IFLA_VLAN_FLAGS]);
+		vlan->flags = (vlan->flags & ~flags->mask) |
+			      (flags->flags & flags->mask);
+	}
+	if (data[IFLA_VLAN_INGRESS_QOS]) {
+		nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) {
+			m = nla_data(attr);
+			vlan_dev_set_ingress_priority(dev, m->to, m->from);
+		}
+	}
+	if (data[IFLA_VLAN_EGRESS_QOS]) {
+		nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) {
+			m = nla_data(attr);
+			vlan_dev_set_egress_priority(dev, m->from, m->to);
+		}
+	}
+	return 0;
+}
+
+static int vlan_newlink(struct net_device *dev,
+			struct nlattr *tb[], struct nlattr *data[])
+{
+	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct net_device *real_dev;
+	int err;
+
+	if (!data[IFLA_VLAN_ID])
+		return -EINVAL;
+
+	if (!tb[IFLA_LINK])
+		return -EINVAL;
+	real_dev = __dev_get_by_index(nla_get_u32(tb[IFLA_LINK]));
+	if (!real_dev)
+		return -ENODEV;
+
+	vlan->vlan_id  = nla_get_u16(data[IFLA_VLAN_ID]);
+	vlan->real_dev = real_dev;
+	vlan->flags    = VLAN_FLAG_REORDER_HDR;
+
+	err = vlan_check_real_dev(real_dev, vlan->vlan_id);
+	if (err < 0)
+		return err;
+
+	if (!tb[IFLA_MTU])
+		dev->mtu = real_dev->mtu;
+	else if (dev->mtu > real_dev->mtu)
+		return -EINVAL;
+
+	err = vlan_changelink(dev, tb, data);
+	if (err < 0)
+		return err;
+
+	return register_vlan_dev(dev);
+}
+
+static void vlan_dellink(struct net_device *dev)
+{
+	unregister_vlan_device(dev);
+}
+
+static inline size_t vlan_qos_map_size(unsigned int n)
+{
+	if (n == 0)
+		return 0;
+	/* IFLA_VLAN_{EGRESS,INGRESS}_QOS + n * IFLA_VLAN_QOS_MAPPING */
+	return nla_total_size(sizeof(struct nlattr)) +
+	       nla_total_size(sizeof(struct ifla_vlan_qos_mapping)) * n;
+}
+
+static size_t vlan_get_size(const struct net_device *dev)
+{
+	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+
+	return nla_total_size(2) +	/* IFLA_VLAN_ID */
+	       vlan_qos_map_size(vlan->nr_ingress_mappings) +
+	       vlan_qos_map_size(vlan->nr_egress_mappings);
+}
+
+static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct vlan_priority_tci_mapping *pm;
+	struct ifla_vlan_flags f;
+	struct ifla_vlan_qos_mapping m;
+	struct nlattr *nest;
+	unsigned int i;
+
+	NLA_PUT_U16(skb, IFLA_VLAN_ID, VLAN_DEV_INFO(dev)->vlan_id);
+	if (vlan->flags) {
+		f.flags = vlan->flags;
+		f.mask  = ~0;
+		NLA_PUT(skb, IFLA_VLAN_FLAGS, sizeof(f), &f);
+	}
+	if (vlan->nr_ingress_mappings) {
+		nest = nla_nest_start(skb, IFLA_VLAN_INGRESS_QOS);
+		if (nest == NULL)
+			goto nla_put_failure;
+
+		for (i = 0; i < ARRAY_SIZE(vlan->ingress_priority_map); i++) {
+			if (!vlan->ingress_priority_map[i])
+				continue;
+
+			m.from = i;
+			m.to   = vlan->ingress_priority_map[i];
+			NLA_PUT(skb, IFLA_VLAN_QOS_MAPPING,
+				sizeof(m), &m);
+		}
+		nla_nest_end(skb, nest);
+	}
+
+	if (vlan->nr_egress_mappings) {
+		nest = nla_nest_start(skb, IFLA_VLAN_EGRESS_QOS);
+		if (nest == NULL)
+			goto nla_put_failure;
+
+		for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
+			for (pm = vlan->egress_priority_map[i]; pm;
+			     pm = pm->next) {
+				if (!pm->vlan_qos)
+					continue;
+
+				m.from = pm->priority;
+				m.to   = (pm->vlan_qos >> 13) & 0x7;
+				NLA_PUT(skb, IFLA_VLAN_QOS_MAPPING,
+					sizeof(m), &m);
+			}
+		}
+		nla_nest_end(skb, nest);
+	}
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+struct rtnl_link_ops vlan_link_ops __read_mostly = {
+	.kind		= "vlan",
+	.maxtype	= IFLA_VLAN_MAX,
+	.policy		= vlan_policy,
+	.priv_size	= sizeof(struct vlan_dev_info),
+	.setup		= vlan_setup,
+	.validate	= vlan_validate,
+	.newlink	= vlan_newlink,
+	.changelink	= vlan_changelink,
+	.dellink	= vlan_dellink,
+	.get_size	= vlan_get_size,
+	.fill_info	= vlan_fill_info,
+};
+
+int __init vlan_netlink_init(void)
+{
+	return rtnl_link_register(&vlan_link_ops);
+}
+
+void __exit vlan_netlink_fini(void)
+{
+	rtnl_link_unregister(&vlan_link_ops);
+}
+
+MODULE_ALIAS_RTNL_LINK("vlan");
-- 
cgit v0.10.2


From b2f41ff4139c0df8d06f352acc962a62fc07a0c3 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Mon, 28 May 2007 12:23:29 -0300
Subject: ccid3: Update copyrights

Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index ec7fa4d..2d203ae 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -1,8 +1,8 @@
 /*
  *  net/dccp/ccids/ccid3.c
  *
- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
+ *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *
  *  An implementation of the DCCP protocol
  *
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 372d7e7..3829afc 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -1,8 +1,8 @@
 /*
  *  net/dccp/ccids/lib/loss_interval.c
  *
- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
+ *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *
  *  This program is free software; you can redistribute it and/or modify
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index eb25701..1e48fe3 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -3,8 +3,8 @@
 /*
  *  net/dccp/ccids/lib/loss_interval.h
  *
- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
+ *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *
  *  This program is free software; you can redistribute it and/or modify it
-- 
cgit v0.10.2


From e961811fcde4202ae5c3c9ce81dcfc244e8959bb Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Mon, 28 May 2007 16:32:26 -0300
Subject: Fix dccp_sum_coverage

When compiling with EXTRA_CFLAGS=-W notice that we have signed/unsigned issue
in dccp.h.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index d8ad27b..e2d74cd 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -184,7 +184,7 @@ DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
 /*
  * 	Checksumming routines
  */
-static inline int dccp_csum_coverage(const struct sk_buff *skb)
+static inline unsigned int dccp_csum_coverage(const struct sk_buff *skb)
 {
 	const struct dccp_hdr* dh = dccp_hdr(skb);
 
@@ -195,7 +195,7 @@ static inline int dccp_csum_coverage(const struct sk_buff *skb)
 
 static inline void dccp_csum_outgoing(struct sk_buff *skb)
 {
-	int cov = dccp_csum_coverage(skb);
+	unsigned int cov = dccp_csum_coverage(skb);
 
 	if (cov >= skb->len)
 		dccp_hdr(skb)->dccph_cscov = 0;
-- 
cgit v0.10.2


From 6bc7efe8efa627077f8f65d01dbb762fc9356a2f Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Mon, 28 May 2007 16:37:45 -0300
Subject: loss_interval: Fix timeval initialisation

When compiling with EXTRA_CFLAGS=-W noticed that tstamp is not initialised
correctly in dccp_li_calc_first_li.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 2d203ae..9686a8d 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -829,7 +829,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
 	struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
 	u32 x_recv, p;
 	suseconds_t rtt, delta;
-	struct timeval tstamp = { 0, };
+	struct timeval tstamp = { 0, 0 };
 	int interval = 0;
 	int win_count = 0;
 	int step = 0;
-- 
cgit v0.10.2


From d83258a3da1d3c7ae7b75549c8bf7ed689562c62 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 28 May 2007 18:04:14 -0300
Subject: Remove accesses to ccid3_hc_rx_sock in
 ccid3_hc_rx_{update,calc_first}_li

This is a preparatory patch for moving these loss interval functions from
net/dccp/ccids/ccid3.c to net/dccp/ccids/lib/loss_interval.c.

Based on a patch by Ian McDonald.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 9686a8d..fb500d3 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -823,9 +823,12 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
  *
  * returns estimated loss interval in usecs */
 
-static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
+static u32 ccid3_hc_rx_calc_first_li(struct sock *sk,
+				     struct list_head *hist_list,
+				     struct timeval *last_feedback,
+				     u16 s, u32 bytes_recv,
+				     u32 previous_x_recv)
 {
-	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
 	u32 x_recv, p;
 	suseconds_t rtt, delta;
@@ -835,8 +838,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
 	int step = 0;
 	u64 fval;
 
-	list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist,
-				 dccphrx_node) {
+	list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) {
 		if (dccp_rx_hist_entry_data_packet(entry)) {
 			tail = entry;
 
@@ -895,19 +897,20 @@ found:
 	}
 
 	dccp_timestamp(sk, &tstamp);
-	delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
+	delta = timeval_delta(&tstamp, last_feedback);
 	DCCP_BUG_ON(delta <= 0);
 
-	x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
+	x_recv = scaled_div32(bytes_recv, delta);
 	if (x_recv == 0) {		/* would also trigger divide-by-zero */
 		DCCP_WARN("X_recv==0\n");
-		if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
+		if (previous_x_recv == 0) {
 			DCCP_BUG("stored value of X_recv is zero");
 			return ~0;
 		}
+		x_recv = previous_x_recv;
 	}
 
-	fval = scaled_div(hcrx->ccid3hcrx_s, rtt);
+	fval = scaled_div(s, rtt);
 	fval = scaled_div32(fval, x_recv);
 	p = tfrc_calc_x_reverse_lookup(fval);
 
@@ -920,26 +923,36 @@ found:
 		return 1000000 / p;
 }
 
-static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
+static void ccid3_hc_rx_update_li(struct sock *sk,
+				  struct list_head *li_hist_list,
+				  struct list_head *hist_list,
+				  struct timeval *last_feedback,
+				  u16 s, u32 bytes_recv,
+				  u32 previous_x_recv,
+				  u64 seq_loss, u8 win_loss)
 {
-	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	struct dccp_li_hist_entry *head;
 	u64 seq_temp;
 
-	if (list_empty(&hcrx->ccid3hcrx_li_hist)) {
+	if (list_empty(li_hist_list)) {
 		if (!dccp_li_hist_interval_new(ccid3_li_hist,
-		   &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss))
+					       li_hist_list, seq_loss,
+					       win_loss))
 			return;
 
-		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
-		   struct dccp_li_hist_entry, dccplih_node);
-		head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
+		head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
+				  dccplih_node);
+		head->dccplih_interval =
+				ccid3_hc_rx_calc_first_li(sk, hist_list,
+							  last_feedback, s,
+							  bytes_recv,
+							  previous_x_recv);
 	} else {
 		struct dccp_li_hist_entry *entry;
 		struct list_head *tail;
 
-		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
-		   struct dccp_li_hist_entry, dccplih_node);
+		head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
+				  dccplih_node);
 		/* FIXME win count check removed as was wrong */
 		/* should make this check with receive history */
 		/* and compare there as per section 10.2 of RFC4342 */
@@ -954,9 +967,9 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
 			return;
 		}
 
-		list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist);
+		list_add(&entry->dccplih_node, li_hist_list);
 
-		tail = hcrx->ccid3hcrx_li_hist.prev;
+		tail = li_hist_list->prev;
 		list_del(tail);
 		kmem_cache_free(ccid3_li_hist->dccplih_slab, tail);
 
@@ -992,8 +1005,15 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
 	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
 	   > TFRC_RECV_NUM_LATE_LOSS) {
 		loss = 1;
-		ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss,
-		   hcrx->ccid3hcrx_ccval_nonloss);
+		ccid3_hc_rx_update_li(sk,
+				      &hcrx->ccid3hcrx_li_hist,
+				      &hcrx->ccid3hcrx_hist,
+				      &hcrx->ccid3hcrx_tstamp_last_feedback,
+				      hcrx->ccid3hcrx_s,
+				      hcrx->ccid3hcrx_bytes_recv,
+				      hcrx->ccid3hcrx_x_recv,
+				      hcrx->ccid3hcrx_seqno_nonloss,
+				      hcrx->ccid3hcrx_ccval_nonloss);
 		tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
 		dccp_inc_seqno(&tmp_seqno);
 		hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
-- 
cgit v0.10.2


From 878ac60023c4ba11a7fbf0b1dfe07b8472c0d6ce Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 14 Jun 2007 12:24:46 -0300
Subject: [CCID3]: Pass ccid3_li_hist to ccid3_hc_rx_update_li

Now ccid3_hc_rx_update_li is ready to be moved to
net/dccp/ccids/lib/loss_interval, it uses the same interface as the other
functions there.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index fb500d3..52a71a9 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -924,6 +924,7 @@ found:
 }
 
 static void ccid3_hc_rx_update_li(struct sock *sk,
+				  struct dccp_li_hist *li_hist,
 				  struct list_head *li_hist_list,
 				  struct list_head *hist_list,
 				  struct timeval *last_feedback,
@@ -935,9 +936,8 @@ static void ccid3_hc_rx_update_li(struct sock *sk,
 	u64 seq_temp;
 
 	if (list_empty(li_hist_list)) {
-		if (!dccp_li_hist_interval_new(ccid3_li_hist,
-					       li_hist_list, seq_loss,
-					       win_loss))
+		if (!dccp_li_hist_interval_new(li_hist, li_hist_list,
+					       seq_loss, win_loss))
 			return;
 
 		head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
@@ -960,7 +960,7 @@ static void ccid3_hc_rx_update_li(struct sock *sk,
 		/* new loss event detected */
 		/* calculate last interval length */
 		seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
-		entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC);
+		entry = dccp_li_hist_entry_new(li_hist, GFP_ATOMIC);
 
 		if (entry == NULL) {
 			DCCP_BUG("out of memory - can not allocate entry");
@@ -971,7 +971,7 @@ static void ccid3_hc_rx_update_li(struct sock *sk,
 
 		tail = li_hist_list->prev;
 		list_del(tail);
-		kmem_cache_free(ccid3_li_hist->dccplih_slab, tail);
+		kmem_cache_free(li_hist->dccplih_slab, tail);
 
 		/* Create the newest interval */
 		entry->dccplih_seqno = seq_loss;
@@ -1005,7 +1005,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
 	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
 	   > TFRC_RECV_NUM_LATE_LOSS) {
 		loss = 1;
-		ccid3_hc_rx_update_li(sk,
+		ccid3_hc_rx_update_li(sk, ccid3_li_hist,
 				      &hcrx->ccid3hcrx_li_hist,
 				      &hcrx->ccid3hcrx_hist,
 				      &hcrx->ccid3hcrx_tstamp_last_feedback,
-- 
cgit v0.10.2


From cc0a910b942d11069d35f52b2c0ed0e229e2fb46 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 14 Jun 2007 17:41:28 -0300
Subject: [DCCP] loss_interval: Move ccid3_hc_rx_update_li to loss_interval

Renaming it to dccp_li_update_li.

Also based on previous work by Ian McDonald.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 52a71a9..9d2e2c1 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -819,167 +819,6 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
-/* calculate first loss interval
- *
- * returns estimated loss interval in usecs */
-
-static u32 ccid3_hc_rx_calc_first_li(struct sock *sk,
-				     struct list_head *hist_list,
-				     struct timeval *last_feedback,
-				     u16 s, u32 bytes_recv,
-				     u32 previous_x_recv)
-{
-	struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
-	u32 x_recv, p;
-	suseconds_t rtt, delta;
-	struct timeval tstamp = { 0, 0 };
-	int interval = 0;
-	int win_count = 0;
-	int step = 0;
-	u64 fval;
-
-	list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) {
-		if (dccp_rx_hist_entry_data_packet(entry)) {
-			tail = entry;
-
-			switch (step) {
-			case 0:
-				tstamp	  = entry->dccphrx_tstamp;
-				win_count = entry->dccphrx_ccval;
-				step = 1;
-				break;
-			case 1:
-				interval = win_count - entry->dccphrx_ccval;
-				if (interval < 0)
-					interval += TFRC_WIN_COUNT_LIMIT;
-				if (interval > 4)
-					goto found;
-				break;
-			}
-		}
-	}
-
-	if (unlikely(step == 0)) {
-		DCCP_WARN("%s(%p), packet history has no data packets!\n",
-			  dccp_role(sk), sk);
-		return ~0;
-	}
-
-	if (unlikely(interval == 0)) {
-		DCCP_WARN("%s(%p), Could not find a win_count interval > 0."
-			  "Defaulting to 1\n", dccp_role(sk), sk);
-		interval = 1;
-	}
-found:
-	if (!tail) {
-		DCCP_CRIT("tail is null\n");
-		return ~0;
-	}
-
-	delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp);
-	DCCP_BUG_ON(delta < 0);
-
-	rtt = delta * 4 / interval;
-	ccid3_pr_debug("%s(%p), approximated RTT to %dus\n",
-		       dccp_role(sk), sk, (int)rtt);
-
-	/*
-	 * Determine the length of the first loss interval via inverse lookup.
-	 * Assume that X_recv can be computed by the throughput equation
-	 *		    s
-	 *	X_recv = --------
-	 *		 R * fval
-	 * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
-	 */
-	if (rtt == 0) {			/* would result in divide-by-zero */
-		DCCP_WARN("RTT==0\n");
-		return ~0;
-	}
-
-	dccp_timestamp(sk, &tstamp);
-	delta = timeval_delta(&tstamp, last_feedback);
-	DCCP_BUG_ON(delta <= 0);
-
-	x_recv = scaled_div32(bytes_recv, delta);
-	if (x_recv == 0) {		/* would also trigger divide-by-zero */
-		DCCP_WARN("X_recv==0\n");
-		if (previous_x_recv == 0) {
-			DCCP_BUG("stored value of X_recv is zero");
-			return ~0;
-		}
-		x_recv = previous_x_recv;
-	}
-
-	fval = scaled_div(s, rtt);
-	fval = scaled_div32(fval, x_recv);
-	p = tfrc_calc_x_reverse_lookup(fval);
-
-	ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
-		       "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
-
-	if (p == 0)
-		return ~0;
-	else
-		return 1000000 / p;
-}
-
-static void ccid3_hc_rx_update_li(struct sock *sk,
-				  struct dccp_li_hist *li_hist,
-				  struct list_head *li_hist_list,
-				  struct list_head *hist_list,
-				  struct timeval *last_feedback,
-				  u16 s, u32 bytes_recv,
-				  u32 previous_x_recv,
-				  u64 seq_loss, u8 win_loss)
-{
-	struct dccp_li_hist_entry *head;
-	u64 seq_temp;
-
-	if (list_empty(li_hist_list)) {
-		if (!dccp_li_hist_interval_new(li_hist, li_hist_list,
-					       seq_loss, win_loss))
-			return;
-
-		head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
-				  dccplih_node);
-		head->dccplih_interval =
-				ccid3_hc_rx_calc_first_li(sk, hist_list,
-							  last_feedback, s,
-							  bytes_recv,
-							  previous_x_recv);
-	} else {
-		struct dccp_li_hist_entry *entry;
-		struct list_head *tail;
-
-		head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
-				  dccplih_node);
-		/* FIXME win count check removed as was wrong */
-		/* should make this check with receive history */
-		/* and compare there as per section 10.2 of RFC4342 */
-
-		/* new loss event detected */
-		/* calculate last interval length */
-		seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
-		entry = dccp_li_hist_entry_new(li_hist, GFP_ATOMIC);
-
-		if (entry == NULL) {
-			DCCP_BUG("out of memory - can not allocate entry");
-			return;
-		}
-
-		list_add(&entry->dccplih_node, li_hist_list);
-
-		tail = li_hist_list->prev;
-		list_del(tail);
-		kmem_cache_free(li_hist->dccplih_slab, tail);
-
-		/* Create the newest interval */
-		entry->dccplih_seqno = seq_loss;
-		entry->dccplih_interval = seq_temp;
-		entry->dccplih_win_count = win_loss;
-	}
-}
-
 static int ccid3_hc_rx_detect_loss(struct sock *sk,
 				    struct dccp_rx_hist_entry *packet)
 {
@@ -1005,15 +844,15 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
 	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
 	   > TFRC_RECV_NUM_LATE_LOSS) {
 		loss = 1;
-		ccid3_hc_rx_update_li(sk, ccid3_li_hist,
-				      &hcrx->ccid3hcrx_li_hist,
-				      &hcrx->ccid3hcrx_hist,
-				      &hcrx->ccid3hcrx_tstamp_last_feedback,
-				      hcrx->ccid3hcrx_s,
-				      hcrx->ccid3hcrx_bytes_recv,
-				      hcrx->ccid3hcrx_x_recv,
-				      hcrx->ccid3hcrx_seqno_nonloss,
-				      hcrx->ccid3hcrx_ccval_nonloss);
+		dccp_li_update_li(sk, ccid3_li_hist,
+				  &hcrx->ccid3hcrx_li_hist,
+				  &hcrx->ccid3hcrx_hist,
+				  &hcrx->ccid3hcrx_tstamp_last_feedback,
+				  hcrx->ccid3hcrx_s,
+				  hcrx->ccid3hcrx_bytes_recv,
+				  hcrx->ccid3hcrx_x_recv,
+				  hcrx->ccid3hcrx_seqno_nonloss,
+				  hcrx->ccid3hcrx_ccval_nonloss);
 		tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
 		dccp_inc_seqno(&tmp_seqno);
 		hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 3829afc..ee59fde 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -15,6 +15,8 @@
 #include <net/sock.h>
 #include "../../dccp.h"
 #include "loss_interval.h"
+#include "packet_history.h"
+#include "tfrc.h"
 
 struct dccp_li_hist *dccp_li_hist_new(const char *name)
 {
@@ -141,3 +143,161 @@ int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
 }
 
 EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new);
+
+/* calculate first loss interval
+ *
+ * returns estimated loss interval in usecs */
+static u32 dccp_li_calc_first_li(struct sock *sk,
+				 struct list_head *hist_list,
+				 struct timeval *last_feedback,
+				 u16 s, u32 bytes_recv,
+				 u32 previous_x_recv)
+{
+	struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
+	u32 x_recv, p;
+	suseconds_t rtt, delta;
+	struct timeval tstamp = { 0, 0 };
+	int interval = 0;
+	int win_count = 0;
+	int step = 0;
+	u64 fval;
+
+	list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) {
+		if (dccp_rx_hist_entry_data_packet(entry)) {
+			tail = entry;
+
+			switch (step) {
+			case 0:
+				tstamp	  = entry->dccphrx_tstamp;
+				win_count = entry->dccphrx_ccval;
+				step = 1;
+				break;
+			case 1:
+				interval = win_count - entry->dccphrx_ccval;
+				if (interval < 0)
+					interval += TFRC_WIN_COUNT_LIMIT;
+				if (interval > 4)
+					goto found;
+				break;
+			}
+		}
+	}
+
+	if (unlikely(step == 0)) {
+		DCCP_WARN("%s(%p), packet history has no data packets!\n",
+			  dccp_role(sk), sk);
+		return ~0;
+	}
+
+	if (unlikely(interval == 0)) {
+		DCCP_WARN("%s(%p), Could not find a win_count interval > 0."
+			  "Defaulting to 1\n", dccp_role(sk), sk);
+		interval = 1;
+	}
+found:
+	if (!tail) {
+		DCCP_CRIT("tail is null\n");
+		return ~0;
+	}
+
+	delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp);
+	DCCP_BUG_ON(delta < 0);
+
+	rtt = delta * 4 / interval;
+	dccp_pr_debug("%s(%p), approximated RTT to %dus\n",
+		      dccp_role(sk), sk, (int)rtt);
+
+	/*
+	 * Determine the length of the first loss interval via inverse lookup.
+	 * Assume that X_recv can be computed by the throughput equation
+	 *		    s
+	 *	X_recv = --------
+	 *		 R * fval
+	 * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
+	 */
+	if (rtt == 0) {			/* would result in divide-by-zero */
+		DCCP_WARN("RTT==0\n");
+		return ~0;
+	}
+
+	dccp_timestamp(sk, &tstamp);
+	delta = timeval_delta(&tstamp, last_feedback);
+	DCCP_BUG_ON(delta <= 0);
+
+	x_recv = scaled_div32(bytes_recv, delta);
+	if (x_recv == 0) {		/* would also trigger divide-by-zero */
+		DCCP_WARN("X_recv==0\n");
+		if (previous_x_recv == 0) {
+			DCCP_BUG("stored value of X_recv is zero");
+			return ~0;
+		}
+		x_recv = previous_x_recv;
+	}
+
+	fval = scaled_div(s, rtt);
+	fval = scaled_div32(fval, x_recv);
+	p = tfrc_calc_x_reverse_lookup(fval);
+
+	dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
+		      "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
+
+	if (p == 0)
+		return ~0;
+	else
+		return 1000000 / p;
+}
+
+void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist,
+		       struct list_head *li_hist_list,
+		       struct list_head *hist_list,
+		       struct timeval *last_feedback, u16 s, u32 bytes_recv,
+                       u32 previous_x_recv, u64 seq_loss, u8 win_loss)
+{
+	struct dccp_li_hist_entry *head;
+	u64 seq_temp;
+
+	if (list_empty(li_hist_list)) {
+		if (!dccp_li_hist_interval_new(li_hist, li_hist_list,
+					       seq_loss, win_loss))
+			return;
+
+		head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
+				  dccplih_node);
+		head->dccplih_interval = dccp_li_calc_first_li(sk, hist_list,
+							       last_feedback,
+							       s, bytes_recv,
+							       previous_x_recv);
+	} else {
+		struct dccp_li_hist_entry *entry;
+		struct list_head *tail;
+
+		head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
+				  dccplih_node);
+		/* FIXME win count check removed as was wrong */
+		/* should make this check with receive history */
+		/* and compare there as per section 10.2 of RFC4342 */
+
+		/* new loss event detected */
+		/* calculate last interval length */
+		seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
+		entry = dccp_li_hist_entry_new(li_hist, GFP_ATOMIC);
+
+		if (entry == NULL) {
+			DCCP_BUG("out of memory - can not allocate entry");
+			return;
+		}
+
+		list_add(&entry->dccplih_node, li_hist_list);
+
+		tail = li_hist_list->prev;
+		list_del(tail);
+		kmem_cache_free(li_hist->dccplih_slab, tail);
+
+		/* Create the newest interval */
+		entry->dccplih_seqno = seq_loss;
+		entry->dccplih_interval = seq_temp;
+		entry->dccplih_win_count = win_loss;
+	}
+}
+
+EXPORT_SYMBOL_GPL(dccp_li_update_li);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 1e48fe3..17f173a 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -54,4 +54,11 @@ extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
 
 extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
    struct list_head *list, const u64 seq_loss, const u8 win_loss);
+
+extern void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist,
+			      struct list_head *li_hist_list,
+			      struct list_head *hist_list,
+			      struct timeval *last_feedback, u16 s,
+			      u32 bytes_recv, u32 previous_x_recv,
+			      u64 seq_loss, u8 win_loss);
 #endif /* _DCCP_LI_HIST_ */
-- 
cgit v0.10.2


From 8c281780c6f867460c84bd78d9c3885c10f00ae1 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 28 May 2007 18:21:53 -0300
Subject: loss_interval: unexport dccp_li_hist_interval_new

Now its only used inside the loss_interval code.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index ee59fde..8ac68c6 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -120,8 +120,9 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
 
 EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
 
-int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
-   struct list_head *list, const u64 seq_loss, const u8 win_loss)
+static int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+				     struct list_head *list,
+				     const u64 seq_loss, const u8 win_loss)
 {
 	struct dccp_li_hist_entry *entry;
 	int i;
@@ -142,8 +143,6 @@ int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
 	return 1;
 }
 
-EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new);
-
 /* calculate first loss interval
  *
  * returns estimated loss interval in usecs */
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 17f173a..653328d 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -52,9 +52,6 @@ extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
 
 extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
 
-extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
-   struct list_head *list, const u64 seq_loss, const u8 win_loss);
-
 extern void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist,
 			      struct list_head *li_hist_list,
 			      struct list_head *hist_list,
-- 
cgit v0.10.2


From c70b729e662a1b3ee2ef5370c1e4c9bc3ddc239f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 28 May 2007 18:25:12 -0300
Subject: loss_interval: Make dccp_li_hist_entry_{new,delete} private

Not used outside the loss_interval code anymore.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 8ac68c6..28eac9b 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -62,6 +62,20 @@ void dccp_li_hist_delete(struct dccp_li_hist *hist)
 
 EXPORT_SYMBOL_GPL(dccp_li_hist_delete);
 
+static inline struct dccp_li_hist_entry *
+		dccp_li_hist_entry_new(struct dccp_li_hist *hist,
+				       const gfp_t prio)
+{
+	return kmem_cache_alloc(hist->dccplih_slab, prio);
+}
+
+static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist,
+					     struct dccp_li_hist_entry *entry)
+{
+	if (entry != NULL)
+		kmem_cache_free(hist->dccplih_slab, entry);
+}
+
 void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list)
 {
 	struct dccp_li_hist_entry *entry, *next;
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 653328d..8d3c9bf 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -33,20 +33,6 @@ struct dccp_li_hist_entry {
 	u32		 dccplih_interval;
 };
 
-static inline struct dccp_li_hist_entry *
-		dccp_li_hist_entry_new(struct dccp_li_hist *hist,
-				       const gfp_t prio)
-{
-	return kmem_cache_alloc(hist->dccplih_slab, prio);
-}
-
-static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist,
-					     struct dccp_li_hist_entry *entry)
-{
-	if (entry != NULL)
-		kmem_cache_free(hist->dccplih_slab, entry);
-}
-
 extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
 			       struct list_head *list);
 
-- 
cgit v0.10.2


From cc4d6a3a34ce3976d7d01d044f3093cddc2921c2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 28 May 2007 18:53:08 -0300
Subject: loss_interval: Nuke dccp_li_hist

It had just a slab cache, so, for the sake of simplicity just make
dccp_trfc_lib module init routine create the slab cache, no need for users of
the lib to create a private loss_interval object.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 9d2e2c1..407f10c 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -49,7 +49,6 @@ static int ccid3_debug;
 
 static struct dccp_tx_hist *ccid3_tx_hist;
 static struct dccp_rx_hist *ccid3_rx_hist;
-static struct dccp_li_hist *ccid3_li_hist;
 
 /*
  *	Transmitter Half-Connection Routines
@@ -844,7 +843,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
 	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
 	   > TFRC_RECV_NUM_LATE_LOSS) {
 		loss = 1;
-		dccp_li_update_li(sk, ccid3_li_hist,
+		dccp_li_update_li(sk,
 				  &hcrx->ccid3hcrx_li_hist,
 				  &hcrx->ccid3hcrx_hist,
 				  &hcrx->ccid3hcrx_tstamp_last_feedback,
@@ -1011,7 +1010,7 @@ static void ccid3_hc_rx_exit(struct sock *sk)
 	dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist);
 
 	/* Empty loss interval history */
-	dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist);
+	dccp_li_hist_purge(&hcrx->ccid3hcrx_li_hist);
 }
 
 static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
@@ -1095,19 +1094,12 @@ static __init int ccid3_module_init(void)
 	if (ccid3_tx_hist == NULL)
 		goto out_free_rx;
 
-	ccid3_li_hist = dccp_li_hist_new("ccid3");
-	if (ccid3_li_hist == NULL)
-		goto out_free_tx;
-
 	rc = ccid_register(&ccid3);
 	if (rc != 0)
-		goto out_free_loss_interval_history;
+		goto out_free_tx;
 out:
 	return rc;
 
-out_free_loss_interval_history:
-	dccp_li_hist_delete(ccid3_li_hist);
-	ccid3_li_hist = NULL;
 out_free_tx:
 	dccp_tx_hist_delete(ccid3_tx_hist);
 	ccid3_tx_hist = NULL;
@@ -1130,10 +1122,6 @@ static __exit void ccid3_module_exit(void)
 		dccp_rx_hist_delete(ccid3_rx_hist);
 		ccid3_rx_hist = NULL;
 	}
-	if (ccid3_li_hist != NULL) {
-		dccp_li_hist_delete(ccid3_li_hist);
-		ccid3_li_hist = NULL;
-	}
 }
 module_exit(ccid3_module_exit);
 
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 28eac9b..e6b1f0c 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -18,71 +18,26 @@
 #include "packet_history.h"
 #include "tfrc.h"
 
-struct dccp_li_hist *dccp_li_hist_new(const char *name)
-{
-	struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
-	static const char dccp_li_hist_mask[] = "li_hist_%s";
-	char *slab_name;
-
-	if (hist == NULL)
-		goto out;
-
-	slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1,
-			    GFP_ATOMIC);
-	if (slab_name == NULL)
-		goto out_free_hist;
-
-	sprintf(slab_name, dccp_li_hist_mask, name);
-	hist->dccplih_slab = kmem_cache_create(slab_name,
-					     sizeof(struct dccp_li_hist_entry),
-					       0, SLAB_HWCACHE_ALIGN,
-					       NULL, NULL);
-	if (hist->dccplih_slab == NULL)
-		goto out_free_slab_name;
-out:
-	return hist;
-out_free_slab_name:
-	kfree(slab_name);
-out_free_hist:
-	kfree(hist);
-	hist = NULL;
-	goto out;
-}
-
-EXPORT_SYMBOL_GPL(dccp_li_hist_new);
-
-void dccp_li_hist_delete(struct dccp_li_hist *hist)
-{
-	const char* name = kmem_cache_name(hist->dccplih_slab);
-
-	kmem_cache_destroy(hist->dccplih_slab);
-	kfree(name);
-	kfree(hist);
-}
-
-EXPORT_SYMBOL_GPL(dccp_li_hist_delete);
+struct kmem_cache *dccp_li_cachep __read_mostly;
 
-static inline struct dccp_li_hist_entry *
-		dccp_li_hist_entry_new(struct dccp_li_hist *hist,
-				       const gfp_t prio)
+static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio)
 {
-	return kmem_cache_alloc(hist->dccplih_slab, prio);
+	return kmem_cache_alloc(dccp_li_cachep, prio);
 }
 
-static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist,
-					     struct dccp_li_hist_entry *entry)
+static inline void dccp_li_hist_entry_delete(struct dccp_li_hist_entry *entry)
 {
 	if (entry != NULL)
-		kmem_cache_free(hist->dccplih_slab, entry);
+		kmem_cache_free(dccp_li_cachep, entry);
 }
 
-void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list)
+void dccp_li_hist_purge(struct list_head *list)
 {
 	struct dccp_li_hist_entry *entry, *next;
 
 	list_for_each_entry_safe(entry, next, list, dccplih_node) {
 		list_del_init(&entry->dccplih_node);
-		kmem_cache_free(hist->dccplih_slab, entry);
+		kmem_cache_free(dccp_li_cachep, entry);
 	}
 }
 
@@ -134,17 +89,16 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
 
 EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
 
-static int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
-				     struct list_head *list,
+static int dccp_li_hist_interval_new(struct list_head *list,
 				     const u64 seq_loss, const u8 win_loss)
 {
 	struct dccp_li_hist_entry *entry;
 	int i;
 
 	for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) {
-		entry = dccp_li_hist_entry_new(hist, GFP_ATOMIC);
+		entry = dccp_li_hist_entry_new(GFP_ATOMIC);
 		if (entry == NULL) {
-			dccp_li_hist_purge(hist, list);
+			dccp_li_hist_purge(list);
 			DCCP_BUG("loss interval list entry is NULL");
 			return 0;
 		}
@@ -260,7 +214,7 @@ found:
 		return 1000000 / p;
 }
 
-void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist,
+void dccp_li_update_li(struct sock *sk,
 		       struct list_head *li_hist_list,
 		       struct list_head *hist_list,
 		       struct timeval *last_feedback, u16 s, u32 bytes_recv,
@@ -270,8 +224,8 @@ void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist,
 	u64 seq_temp;
 
 	if (list_empty(li_hist_list)) {
-		if (!dccp_li_hist_interval_new(li_hist, li_hist_list,
-					       seq_loss, win_loss))
+		if (!dccp_li_hist_interval_new(li_hist_list, seq_loss,
+					       win_loss))
 			return;
 
 		head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
@@ -293,7 +247,7 @@ void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist,
 		/* new loss event detected */
 		/* calculate last interval length */
 		seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
-		entry = dccp_li_hist_entry_new(li_hist, GFP_ATOMIC);
+		entry = dccp_li_hist_entry_new(GFP_ATOMIC);
 
 		if (entry == NULL) {
 			DCCP_BUG("out of memory - can not allocate entry");
@@ -304,7 +258,7 @@ void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist,
 
 		tail = li_hist_list->prev;
 		list_del(tail);
-		kmem_cache_free(li_hist->dccplih_slab, tail);
+		kmem_cache_free(dccp_li_cachep, tail);
 
 		/* Create the newest interval */
 		entry->dccplih_seqno = seq_loss;
@@ -314,3 +268,19 @@ void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist,
 }
 
 EXPORT_SYMBOL_GPL(dccp_li_update_li);
+
+static __init int dccp_li_init(void)
+{
+	dccp_li_cachep = kmem_cache_create("dccp_li_hist",
+					   sizeof(struct dccp_li_hist_entry),
+					   0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	return dccp_li_cachep == NULL ? -ENOBUFS : 0;
+}
+
+static __exit void dccp_li_exit(void)
+{
+	kmem_cache_destroy(dccp_li_cachep);
+}
+
+module_init(dccp_li_init);
+module_exit(dccp_li_exit);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 8d3c9bf..f35c111 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -19,13 +19,6 @@
 
 #define DCCP_LI_HIST_IVAL_F_LENGTH  8
 
-struct dccp_li_hist {
-	struct kmem_cache *dccplih_slab;
-};
-
-extern struct dccp_li_hist *dccp_li_hist_new(const char *name);
-extern void dccp_li_hist_delete(struct dccp_li_hist *hist);
-
 struct dccp_li_hist_entry {
 	struct list_head dccplih_node;
 	u64		 dccplih_seqno:48,
@@ -33,12 +26,11 @@ struct dccp_li_hist_entry {
 	u32		 dccplih_interval;
 };
 
-extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
-			       struct list_head *list);
+extern void dccp_li_hist_purge(struct list_head *list);
 
 extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
 
-extern void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist,
+extern void dccp_li_update_li(struct sock *sk,
 			      struct list_head *li_hist_list,
 			      struct list_head *hist_list,
 			      struct timeval *last_feedback, u16 s,
-- 
cgit v0.10.2


From dd36a9aba44e4ddbac011de2cb14a70444487303 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 28 May 2007 18:56:44 -0300
Subject: loss_interval: make struct dccp_li_hist_entry private

net/dccp/ccids/lib/loss_interval.c is the only place where this struct is used.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index e6b1f0c..01c1edb 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -18,6 +18,15 @@
 #include "packet_history.h"
 #include "tfrc.h"
 
+#define DCCP_LI_HIST_IVAL_F_LENGTH  8
+
+struct dccp_li_hist_entry {
+	struct list_head dccplih_node;
+	u64		 dccplih_seqno:48,
+			 dccplih_win_count:4;
+	u32		 dccplih_interval;
+};
+
 struct kmem_cache *dccp_li_cachep __read_mostly;
 
 static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio)
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index f35c111..906c806 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -14,18 +14,8 @@
  */
 
 #include <linux/list.h>
-#include <linux/slab.h>
 #include <linux/time.h>
 
-#define DCCP_LI_HIST_IVAL_F_LENGTH  8
-
-struct dccp_li_hist_entry {
-	struct list_head dccplih_node;
-	u64		 dccplih_seqno:48,
-			 dccplih_win_count:4;
-	u32		 dccplih_interval;
-};
-
 extern void dccp_li_hist_purge(struct list_head *list);
 
 extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
-- 
cgit v0.10.2


From f1c91da44728fba24927e44056a56e507c11cf7b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 16 Jun 2007 12:38:51 -0300
Subject: [KTIME]: Introduce ktime_us_delta

This provides a reusable time difference function which returns the difference in
microseconds, as often used in the DCCP code.

Commiter note: renamed ktime_delta to ktime_us_delta and put it in ktime.h.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 2b139f6..9236659 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -279,6 +279,11 @@ static inline s64 ktime_to_us(const ktime_t kt)
 	return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec;
 }
 
+static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier)
+{
+       return ktime_to_us(ktime_sub(later, earlier));
+}
+
 /*
  * The resolution of the clocks. The resolution value is returned in
  * the clock_getres() system call to give application programmers an
-- 
cgit v0.10.2


From 1e180f726a58089d15637b5495fecbad8c50c833 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Sat, 16 Jun 2007 12:39:38 -0300
Subject: [KTIME]: Introduce ktime_add_us

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 9236659..dae7143 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -284,6 +284,11 @@ static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier)
        return ktime_to_us(ktime_sub(later, earlier));
 }
 
+static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec)
+{
+	return ktime_add_ns(kt, usec * 1000);
+}
+
 /*
  * The resolution of the clocks. The resolution value is returned in
  * the clock_getres() system call to give application programmers an
-- 
cgit v0.10.2


From 8132da4d412ad51c34bad11133a8f0941e2a1972 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 16 Jun 2007 13:34:02 -0300
Subject: [CCID3]: Sending time: update to ktime_t

This updates the computation of t_nom and t_last_win_count to use the newer
gettimeofday interface.

Committer note: used ktime_to_timeval to set the 'now' variable to t_ld in
                ccid3hctx_no_feedback_timer

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 407f10c..94b3a1a 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -193,25 +193,20 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
  *	The algorithm is not applicable if RTT < 4 microseconds.
  */
 static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx,
-						struct timeval *now)
+						ktime_t now)
 {
-	suseconds_t delta;
 	u32 quarter_rtts;
 
 	if (unlikely(hctx->ccid3hctx_rtt < 4))	/* avoid divide-by-zero */
 		return;
 
-	delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count);
-	DCCP_BUG_ON(delta < 0);
-
-	quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4);
+	quarter_rtts = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count);
+	quarter_rtts /= hctx->ccid3hctx_rtt / 4;
 
 	if (quarter_rtts > 0) {
-		hctx->ccid3hctx_t_last_win_count = *now;
+		hctx->ccid3hctx_t_last_win_count = now;
 		hctx->ccid3hctx_last_win_count	+= min_t(u32, quarter_rtts, 5);
 		hctx->ccid3hctx_last_win_count	&= 0xF;		/* mod 16 */
-
-		ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count);
 	}
 }
 
@@ -311,8 +306,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	struct timeval now;
-	suseconds_t delay;
+	ktime_t now = ktime_get_real();
+	s64 delay;
 
 	BUG_ON(hctx == NULL);
 
@@ -324,8 +319,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 	if (unlikely(skb->len == 0))
 		return -EBADMSG;
 
-	dccp_timestamp(sk, &now);
-
 	switch (hctx->ccid3hctx_state) {
 	case TFRC_SSTATE_NO_SENT:
 		sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
@@ -348,7 +341,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 			ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt);
 			hctx->ccid3hctx_rtt  = dp->dccps_syn_rtt;
 			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
-			hctx->ccid3hctx_t_ld = now;
+			hctx->ccid3hctx_t_ld = ktime_to_timeval(now);
 		} else {
 			/* Sender does not have RTT sample: X = MSS/second */
 			hctx->ccid3hctx_x = dp->dccps_mss_cache;
@@ -360,7 +353,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		break;
 	case TFRC_SSTATE_NO_FBACK:
 	case TFRC_SSTATE_FBACK:
-		delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now);
+		delay = ktime_us_delta(hctx->ccid3hctx_t_nom, now);
 		ccid3_pr_debug("delay=%ld\n", (long)delay);
 		/*
 		 *	Scheduling of packet transmissions [RFC 3448, 4.6]
@@ -370,10 +363,10 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		 * else
 		 *       // send the packet in (t_nom - t_now) milliseconds.
 		 */
-		if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0)
-			return delay / 1000L;
+		if (delay - (s64)hctx->ccid3hctx_delta >= 0)
+			return (u32)delay / 1000L;
 
-		ccid3_hc_tx_update_win_count(hctx, &now);
+		ccid3_hc_tx_update_win_count(hctx, now);
 		break;
 	case TFRC_SSTATE_TERM:
 		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
@@ -386,8 +379,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 	hctx->ccid3hctx_idle = 0;
 
 	/* set the nominal send time for the next following packet */
-	timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
-
+	hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom,
+					     hctx->ccid3hctx_t_ipi);
 	return 0;
 }
 
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 8d31b38..51d4b80 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -36,6 +36,7 @@
 #ifndef _DCCP_CCID3_H_
 #define _DCCP_CCID3_H_
 
+#include <linux/ktime.h>
 #include <linux/list.h>
 #include <linux/time.h>
 #include <linux/types.h>
@@ -108,10 +109,10 @@ struct ccid3_hc_tx_sock {
 	enum ccid3_hc_tx_states		ccid3hctx_state:8;
 	u8				ccid3hctx_last_win_count;
 	u8				ccid3hctx_idle;
-	struct timeval			ccid3hctx_t_last_win_count;
+	ktime_t				ccid3hctx_t_last_win_count;
 	struct timer_list		ccid3hctx_no_feedback_timer;
 	struct timeval			ccid3hctx_t_ld;
-	struct timeval			ccid3hctx_t_nom;
+	ktime_t				ccid3hctx_t_nom;
 	u32				ccid3hctx_delta;
 	struct list_head		ccid3hctx_hist;
 	struct ccid3_options_received	ccid3hctx_options_received;
-- 
cgit v0.10.2


From 49d66a70cf9fd94057aacd6055334299ab3a5eac Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 16 Jun 2007 13:48:50 -0300
Subject: [CCID3]: Fix a bug in the send time processing

ccid3_hc_tx_send_packet currently returns 0 when the time difference between
current time and t_nom is less than 1000 microseconds.

In this case the packet is sent immediately; but, unlike other packets that can
be emitted on first attempt, it will not have its window counter updated and
its options set as required. This is a bug.

Fix: Require the time difference to be at least 1000 microseconds. The
algorithm then converges: time differences > 1000 microseconds trigger the
timer in dccp_write_xmit; after timer expiry this function is tried again; when
the time difference is less than 1000, the packet will have its options added
and window counter updated as required.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 94b3a1a..e91c2b9 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -363,7 +363,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		 * else
 		 *       // send the packet in (t_nom - t_now) milliseconds.
 		 */
-		if (delay - (s64)hctx->ccid3hctx_delta >= 0)
+		if (delay - (s64)hctx->ccid3hctx_delta >= 1000)
 			return (u32)delay / 1000L;
 
 		ccid3_hc_tx_update_win_count(hctx, now);
-- 
cgit v0.10.2


From 6c1361a6f285bf3df4b502651c0dd38d0eedc044 Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Sun, 24 Jun 2007 19:56:09 -0700
Subject: [NET]: qdisc_restart - readability changes plus one bug fix.

New changes :

- Incorporated Peter Waskiewicz's comments.
- Re-added back one warning message (on driver returning wrong value).

Previous changes :

- Converted to use switch/case code which looks neater.

- "if (ret == NETDEV_TX_LOCKED && lockless)" is buggy, and the lockless
  check should be removed, since driver will return NETDEV_TX_LOCKED only
  if lockless is true and driver has to do the locking. In the original
  code as well as the latest code, this code can result in a bug where
  if LLTX is not set for a driver (lockless == 0) but the driver is written
  wrongly to do a trylock (despite LLTX being set), the driver returns
  LOCKED. But since lockless is zero, the packet is requeue'd instead of
  calling collision code which will issue warning and free up the skb.
  Instead this skb will be retried with this driver next time, and the same
  result will ensue. Removing this check will catch these driver bugs instead
  of hiding the problem. I am keeping this change to readability section
  since :
  	a. it is confusing to check two things as it is; and
  	b. it is difficult to keep this check in the changed 'switch' code.

- Changed some names, like try_get_tx_pkt to dev_dequeue_skb (as that is
  the work being done and easier to understand) and do_dev_requeue to
  dev_requeue_skb, merged handle_dev_cpu_collision and tx_islocked to
  dev_handle_collision (handle_dev_cpu_collision is a small routine with only
  one caller, so there is no need to have two separate routines which also
  results in getting rid of two macros, etc.

- Removed an XXX comment as it should never fail (I suspect this was related
  to batch skb WIP, Jamal ?). Converted some functions to original coding
  style of having the return values and the function name on same line, eg
  prio2list.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9461e8a..983c32c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -34,9 +34,6 @@
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 
-#define SCHED_TX_DROP -2
-#define SCHED_TX_QUEUE -3
-
 /* Main transmission queue. */
 
 /* Modifications to data participating in scheduling must be protected with
@@ -68,41 +65,24 @@ static inline int qdisc_qlen(struct Qdisc *q)
 	return q->q.qlen;
 }
 
-static inline int handle_dev_cpu_collision(struct net_device *dev)
-{
-	if (unlikely(dev->xmit_lock_owner == smp_processor_id())) {
-		if (net_ratelimit())
-			printk(KERN_WARNING
-			       "Dead loop on netdevice %s, fix it urgently!\n",
-			       dev->name);
-		return SCHED_TX_DROP;
-	}
-	__get_cpu_var(netdev_rx_stat).cpu_collision++;
-	return SCHED_TX_QUEUE;
-}
-
-static inline int
-do_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev,
+				  struct Qdisc *q)
 {
-
 	if (unlikely(skb->next))
 		dev->gso_skb = skb;
 	else
 		q->ops->requeue(skb, q);
-	/* XXX: Could netif_schedule fail? Or is the fact we are
-	 * requeueing imply the hardware path is closed
-	 * and even if we fail, some interupt will wake us
-	 */
+
 	netif_schedule(dev);
 	return 0;
 }
 
-static inline struct sk_buff *
-try_get_tx_pkt(struct net_device *dev, struct Qdisc *q)
+static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev,
+					      struct Qdisc *q)
 {
-	struct sk_buff *skb = dev->gso_skb;
+	struct sk_buff *skb;
 
-	if (skb)
+	if ((skb = dev->gso_skb))
 		dev->gso_skb = NULL;
 	else
 		skb = q->dequeue(q);
@@ -110,92 +90,117 @@ try_get_tx_pkt(struct net_device *dev, struct Qdisc *q)
 	return skb;
 }
 
-static inline int
-tx_islocked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+static inline int handle_dev_cpu_collision(struct sk_buff *skb,
+					   struct net_device *dev,
+					   struct Qdisc *q)
 {
-	int ret = handle_dev_cpu_collision(dev);
+	int ret;
 
-	if (ret == SCHED_TX_DROP) {
+	if (unlikely(dev->xmit_lock_owner == smp_processor_id())) {
+		/*
+		 * Same CPU holding the lock. It may be a transient
+		 * configuration error, when hard_start_xmit() recurses. We
+		 * detect it by checking xmit owner and drop the packet when
+		 * deadloop is detected. Return OK to try the next skb.
+		 */
 		kfree_skb(skb);
-		return qdisc_qlen(q);
+		if (net_ratelimit())
+			printk(KERN_WARNING "Dead loop on netdevice %s, "
+			       "fix it urgently!\n", dev->name);
+		ret = qdisc_qlen(q);
+	} else {
+		/*
+		 * Another cpu is holding lock, requeue & delay xmits for
+		 * some time.
+		 */
+		__get_cpu_var(netdev_rx_stat).cpu_collision++;
+		ret = dev_requeue_skb(skb, dev, q);
 	}
 
-	return do_dev_requeue(skb, dev, q);
+	return ret;
 }
 
-
 /*
-   NOTE: Called under dev->queue_lock with locally disabled BH.
-
-   __LINK_STATE_QDISC_RUNNING guarantees only one CPU
-   can enter this region at a time.
-
-   dev->queue_lock serializes queue accesses for this device
-   AND dev->qdisc pointer itself.
-
-   netif_tx_lock serializes accesses to device driver.
-
-   dev->queue_lock and netif_tx_lock are mutually exclusive,
-   if one is grabbed, another must be free.
-
-   Multiple CPUs may contend for the two locks.
-
-   Note, that this procedure can be called by a watchdog timer
-
-   Returns to the caller:
-   Returns:  0  - queue is empty or throttled.
-	    >0  - queue is not empty.
-
-*/
-
+ * NOTE: Called under dev->queue_lock with locally disabled BH.
+ *
+ * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this
+ * device at a time. dev->queue_lock serializes queue accesses for
+ * this device AND dev->qdisc pointer itself.
+ *
+ *  netif_tx_lock serializes accesses to device driver.
+ *
+ *  dev->queue_lock and netif_tx_lock are mutually exclusive,
+ *  if one is grabbed, another must be free.
+ *
+ * Note, that this procedure can be called by a watchdog timer
+ *
+ * Returns to the caller:
+ *				0  - queue is empty or throttled.
+ *				>0 - queue is not empty.
+ *
+ */
 static inline int qdisc_restart(struct net_device *dev)
 {
 	struct Qdisc *q = dev->qdisc;
-	unsigned lockless = (dev->features & NETIF_F_LLTX);
 	struct sk_buff *skb;
+	unsigned lockless;
 	int ret;
 
-	skb = try_get_tx_pkt(dev, q);
-	if (skb == NULL)
+	/* Dequeue packet */
+	if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL))
 		return 0;
 
-	/* we have a packet to send */
-	if (!lockless) {
-		if (!netif_tx_trylock(dev))
-			return tx_islocked(skb, dev, q);
+	/*
+	 * When the driver has LLTX set, it does its own locking in
+	 * start_xmit. These checks are worth it because even uncongested
+	 * locks can be quite expensive. The driver can do a trylock, as
+	 * is being done here; in case of lock contention it should return
+	 * NETDEV_TX_LOCKED and the packet will be requeued.
+	 */
+	lockless = (dev->features & NETIF_F_LLTX);
+
+	if (!lockless && !netif_tx_trylock(dev)) {
+		/* Another CPU grabbed the driver tx lock */
+		return handle_dev_cpu_collision(skb, dev, q);
 	}
-	/* all clear .. */
+
+	/* And release queue */
 	spin_unlock(&dev->queue_lock);
 
 	ret = NETDEV_TX_BUSY;
 	if (!netif_queue_stopped(dev))
-		/* churn baby churn .. */
 		ret = dev_hard_start_xmit(skb, dev);
 
 	if (!lockless)
 		netif_tx_unlock(dev);
 
 	spin_lock(&dev->queue_lock);
-
-	/* we need to refresh q because it may be invalid since
-	 * we dropped  dev->queue_lock earlier ...
-	 * So dont try to be clever grasshopper
-	 */
 	q = dev->qdisc;
-	/* most likely result, packet went ok */
-	if (ret == NETDEV_TX_OK)
-		return qdisc_qlen(q);
-	/* only for lockless drivers .. */
-	if (ret == NETDEV_TX_LOCKED && lockless)
-		return tx_islocked(skb, dev, q);
 
-	if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
-		printk(KERN_WARNING " BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);
+	switch (ret) {
+	case NETDEV_TX_OK:
+		/* Driver sent out skb successfully */
+		ret = qdisc_qlen(q);
+		break;
+
+	case NETDEV_TX_LOCKED:
+		/* Driver try lock failed */
+		ret = handle_dev_cpu_collision(skb, dev, q);
+		break;
+
+	default:
+		/* Driver returned NETDEV_TX_BUSY - requeue skb */
+		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
+			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
+			       dev->name, ret, q->q.qlen);
+
+		ret = dev_requeue_skb(skb, dev, q);
+		break;
+	}
 
-	return do_dev_requeue(skb, dev, q);
+	return ret;
 }
 
-
 void __qdisc_run(struct net_device *dev)
 {
 	do {
-- 
cgit v0.10.2


From e50c41b53d7aa48152dd9c633b04fc7abd536f1f Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Sun, 24 Jun 2007 19:57:27 -0700
Subject: [NET]: qdisc_restart - couple of optimizations.

Changes :

- netif_queue_stopped need not be called inside qdisc_restart as
  it has been called already in qdisc_run() before the first skb
  is sent, and in __qdisc_run() after each intermediate skb is
  sent (note : we are the only sender, so the queue cannot get
  stopped while the tx lock was got in the ~LLTX case).

- BUG_ON((int) q->q.qlen < 0) was a relic from old times when -1
  meant more packets are available, and __qdisc_run used to loop
  when qdisc_restart() returned -1. During those days, it was
  necessary to make sure that qlen is never less than zero, since
  __qdisc_run would get into an infinite loop if no packets are on
  the queue and this bug in qdisc was there (and worse - no more
  skbs could ever get queue'd as we hold the queue lock too). With
  Herbert's recent change to return values, this check is not
  required.  Hopefully Herbert can validate this change. If at all
  this is required, it should be added to skb_dequeue (in failure
  case), and not to qdisc_qlen.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 983c32c..2488dbb 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -61,7 +61,6 @@ void qdisc_unlock_tree(struct net_device *dev)
 
 static inline int qdisc_qlen(struct Qdisc *q)
 {
-	BUG_ON((int) q->q.qlen < 0);
 	return q->q.qlen;
 }
 
@@ -167,9 +166,7 @@ static inline int qdisc_restart(struct net_device *dev)
 	/* And release queue */
 	spin_unlock(&dev->queue_lock);
 
-	ret = NETDEV_TX_BUSY;
-	if (!netif_queue_stopped(dev))
-		ret = dev_hard_start_xmit(skb, dev);
+	ret = dev_hard_start_xmit(skb, dev);
 
 	if (!lockless)
 		netif_tx_unlock(dev);
-- 
cgit v0.10.2


From 334a8132d9950f769f390f0f35c233d099688e7a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 25 Jun 2007 04:35:20 -0700
Subject: [SKBUFF]: Keep track of writable header len of headerless clones

Currently NAT (and others) that want to modify cloned skbs copy them,
even if in the vast majority of cases its not necessary because the
skb is a clone made by TCP and the portion NAT wants to modify is
actually writable because TCP release the header reference before
cloning.

The problem is that there is no clean way for NAT to find out how
long the writable header area is, so this patch introduces skb->hdr_len
to hold this length. When a headerless skb is cloned skb->hdr_len
is set to the current headroom, for regular clones it is copied from
the original. A new function skb_clone_writable(skb, len) returns
whether the skb is writable up to len bytes from skb->data. To avoid
enlarging the skb the mac_len field is reduced to 16 bit and the
new hdr_len field is put in the remaining 16 bit.

I've done a few rough benchmarks of NAT (not with this exact patch,
but a very similar one). As expected it saves huge amounts of system
time in case of sendfile, bringing it down to basically the same
amount as without NAT, with sendmsg it only helps on loopback,
probably because of the large MTU.

Transmit a 1GB file using sendfile/sendmsg over eth0/lo with and
without NAT:

- sendfile eth0, no NAT:	sys     0m0.388s
- sendfile eth0, NAT:		sys     0m1.835s
- sendfile eth0: NAT + path:	sys     0m0.370s	(~ -80%)

- sendfile lo, no NAT:		sys     0m0.258s
- sendfile lo, NAT:		sys     0m2.609s
- sendfile lo, NAT + patch:	sys     0m0.260s	(~ -90%)

- sendmsg eth0, no NAT:		sys     0m2.508s
- sendmsg eth0, NAT:		sys     0m2.539s
- sendmsg eth0, NAT + patch:	sys     0m2.445s	(no change)

- sendmsg lo, no NAT:		sys	0m2.151s
- sendmsg lo, NAT:		sys     0m3.557s
- sendmsg lo, NAT + patch:	sys     0m2.159s	(~ -40%)

I expect other users can see a similar performance improvement,
packet mangling iptables targets, ipip and ip_gre come to mind ..

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6f0b2f7..881fe80 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -147,8 +147,8 @@ struct skb_shared_info {
 
 /* We divide dataref into two halves.  The higher 16 bits hold references
  * to the payload part of skb->data.  The lower 16 bits hold references to
- * the entire skb->data.  It is up to the users of the skb to agree on
- * where the payload starts.
+ * the entire skb->data.  A clone of a headerless skb holds the length of
+ * the header in skb->hdr_len.
  *
  * All users must obey the rule that the skb->data reference count must be
  * greater than or equal to the payload reference count.
@@ -206,6 +206,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@len: Length of actual data
  *	@data_len: Data length
  *	@mac_len: Length of link layer header
+ *	@hdr_len: writable header length of cloned skb
  *	@csum: Checksum (must include start/offset pair)
  *	@csum_start: Offset from skb->head where checksumming should start
  *	@csum_offset: Offset from csum_start where checksum should be stored
@@ -260,8 +261,9 @@ struct sk_buff {
 	char			cb[48];
 
 	unsigned int		len,
-				data_len,
-				mac_len;
+				data_len;
+	__u16			mac_len,
+				hdr_len;
 	union {
 		__wsum		csum;
 		struct {
@@ -1322,6 +1324,20 @@ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev,
 }
 
 /**
+ *	skb_clone_writable - is the header of a clone writable
+ *	@skb: buffer to check
+ *	@len: length up to which to write
+ *
+ *	Returns true if modifying the header part of the cloned buffer
+ *	does not requires the data to be copied.
+ */
+static inline int skb_clone_writable(struct sk_buff *skb, int len)
+{
+	return !skb_header_cloned(skb) &&
+	       skb_headroom(skb) + len <= skb->hdr_len;
+}
+
+/**
  *	skb_cow - copy header of skb when it is required
  *	@skb: buffer to cow
  *	@headroom: needed headroom
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3943c3a..c989c3a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -415,6 +415,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 	C(csum);
 	C(local_df);
 	n->cloned = 1;
+	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
 	n->nohdr = 0;
 	C(pkt_type);
 	C(ip_summed);
@@ -676,6 +677,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	skb->network_header   += off;
 	skb->mac_header	      += off;
 	skb->cloned   = 0;
+	skb->hdr_len  = 0;
 	skb->nohdr    = 0;
 	atomic_set(&skb_shinfo(skb)->dataref, 1);
 	return 0;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a84478e..3aaabec 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -203,7 +203,9 @@ int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len)
 		return 0;
 
 	/* Not exclusive use of packet?  Must copy. */
-	if (skb_shared(*pskb) || skb_cloned(*pskb))
+	if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len))
+		goto copy_skb;
+	if (skb_shared(*pskb))
 		goto copy_skb;
 
 	return pskb_may_pull(*pskb, writable_len);
-- 
cgit v0.10.2


From 1092cb219774a82b1f16781aec7b8d4ec727c981 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 25 Jun 2007 13:49:35 -0700
Subject: [NETLINK]: attr: add nested compat attribute type

Add a nested compat attribute type that can be used to convert
attributes that contain a structure to nested attributes in a
backwards compatible way.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 7b510a9..d7b824b 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -118,6 +118,9 @@
  * Nested Attributes Construction:
  *   nla_nest_start(skb, type)		start a nested attribute
  *   nla_nest_end(skb, nla)		finalize a nested attribute
+ *   nla_nest_compat_start(skb, type,	start a nested compat attribute
+ *			   len, data)
+ *   nla_nest_compat_end(skb, type)	finalize a nested compat attribute
  *   nla_nest_cancel(skb, nla)		cancel nested attribute construction
  *
  * Attribute Length Calculations:
@@ -152,6 +155,7 @@
  *   nla_find_nested()			find attribute in nested attributes
  *   nla_parse()			parse and validate stream of attrs
  *   nla_parse_nested()			parse nested attribuets
+ *   nla_parse_nested_compat()		parse nested compat attributes
  *   nla_for_each_attr()		loop over all attributes
  *   nla_for_each_nested()		loop over the nested attributes
  *=========================================================================
@@ -170,6 +174,7 @@ enum {
 	NLA_FLAG,
 	NLA_MSECS,
 	NLA_NESTED,
+	NLA_NESTED_COMPAT,
 	NLA_NUL_STRING,
 	NLA_BINARY,
 	__NLA_TYPE_MAX,
@@ -190,6 +195,7 @@ enum {
  *    NLA_NUL_STRING       Maximum length of string (excluding NUL)
  *    NLA_FLAG             Unused
  *    NLA_BINARY           Maximum length of attribute payload
+ *    NLA_NESTED_COMPAT    Exact length of structure payload
  *    All other            Exact length of attribute payload
  *
  * Example:
@@ -733,6 +739,39 @@ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype,
 {
 	return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy);
 }
+
+/**
+ * nla_parse_nested_compat - parse nested compat attributes
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @nla: attribute containing the nested attributes
+ * @data: pointer to point to contained structure
+ * @len: length of contained structure
+ * @policy: validation policy
+ *
+ * Parse a nested compat attribute. The compat attribute contains a structure
+ * and optionally a set of nested attributes. On success the data pointer
+ * points to the nested data and tb contains the parsed attributes
+ * (see nla_parse).
+ */
+static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype,
+					    struct nlattr *nla,
+					    const struct nla_policy *policy,
+					    int len)
+{
+	if (nla_len(nla) < len)
+		return -1;
+	if (nla_len(nla) >= NLA_ALIGN(len) + sizeof(struct nlattr))
+		return nla_parse_nested(tb, maxtype,
+					nla_data(nla) + NLA_ALIGN(len),
+					policy);
+	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+	return 0;
+}
+
+#define nla_parse_nested_compat(tb, maxtype, nla, policy, data, len) \
+({	data = nla_len(nla) >= len ? nla_data(nla) : NULL; \
+	__nla_parse_nested_compat(tb, maxtype, nla, policy, len); })
 /**
  * nla_put_u8 - Add a u16 netlink attribute to a socket buffer
  * @skb: socket buffer to add attribute to
@@ -965,6 +1004,51 @@ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start)
 }
 
 /**
+ * nla_nest_compat_start - Start a new level of nested compat attributes
+ * @skb: socket buffer to add attributes to
+ * @attrtype: attribute type of container
+ * @attrlen: length of structure
+ * @data: pointer to structure
+ *
+ * Start a nested compat attribute that contains both a structure and
+ * a set of nested attributes.
+ *
+ * Returns the container attribute
+ */
+static inline struct nlattr *nla_nest_compat_start(struct sk_buff *skb,
+						   int attrtype, int attrlen,
+						   const void *data)
+{
+	struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb);
+
+	if (nla_put(skb, attrtype, attrlen, data) < 0)
+		return NULL;
+	if (nla_nest_start(skb, attrtype) == NULL) {
+		nlmsg_trim(skb, start);
+		return NULL;
+	}
+	return start;
+}
+
+/**
+ * nla_nest_compat_end - Finalize nesting of compat attributes
+ * @skb: socket buffer the attribtues are stored in
+ * @start: container attribute
+ *
+ * Corrects the container attribute header to include the all
+ * appeneded attributes.
+ *
+ * Returns the total data length of the skb.
+ */
+static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start)
+{
+	struct nlattr *nest = (void *)start + NLMSG_ALIGN(start->nla_len);
+
+	start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start;
+	return nla_nest_end(skb, nest);
+}
+
+/**
  * nla_nest_cancel - Cancel nesting of attributes
  * @skb: socket buffer the message is stored in
  * @start: container attribute
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index c591212..e4d7bed 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -72,6 +72,17 @@ static int validate_nla(struct nlattr *nla, int maxtype,
 			return -ERANGE;
 		break;
 
+	case NLA_NESTED_COMPAT:
+		if (attrlen < pt->len)
+			return -ERANGE;
+		if (attrlen < NLA_ALIGN(pt->len))
+			break;
+		if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN)
+			return -ERANGE;
+		nla = nla_data(nla) + NLA_ALIGN(pt->len);
+		if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla))
+			return -ERANGE;
+		break;
 	default:
 		if (pt->len)
 			minlen = pt->len;
-- 
cgit v0.10.2


From afdc3238ec948531205f5c5f77d2de7bae519c71 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 25 Jun 2007 14:30:16 -0700
Subject: [RTNETLINK]: Add nested compat attribute

Add a nested compat attribute type that can be used to convert
attributes that contain a structure to nested attributes in a
backwards compatible way.

The attribute looks like this:

struct {
        [ compat contents ]
        struct rtattr {
                .rta_len        = total size,
                .rta_type       = type,
        } rta;
        struct old_structure struct;

        [ nested top-level attribute ]
        struct rtattr {
                .rta_len        = nest size,
                .rta_type       = type,
        } nest_attr;

        [ optional 0 .. n nested attributes ]
        struct rtattr {
                .rta_len        = private attribute len,
                .rta_type       = private attribute typ,
        } nested_attr;
        struct nested_data data;
};

Since both userspace and kernel deal correctly with attributes that are
larger than expected old versions will just parse the compat part and
ignore the rest.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 6127858..6731e7f 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -570,6 +570,8 @@ static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str)
 }
 
 extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len);
+extern int rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
+				      struct rtattr *rta, void **data, int len);
 
 #define rtattr_parse_nested(tb, max, rta) \
 	rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta)))
@@ -638,6 +640,18 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi
 ({	(start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \
 	(skb)->len; })
 
+#define RTA_NEST_COMPAT(skb, type, attrlen, data) \
+({	struct rtattr *__start = (struct rtattr *)skb_tail_pointer(skb); \
+	RTA_PUT(skb, type, attrlen, data); \
+	RTA_NEST(skb, type); \
+	__start; })
+
+#define RTA_NEST_COMPAT_END(skb, start) \
+({	struct rtattr *__nest = (void *)(start) + NLMSG_ALIGN((start)->rta_len); \
+	(start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \
+	RTA_NEST_END(skb, __nest); \
+	(skb)->len; })
+
 #define RTA_NEST_CANCEL(skb, start) \
 ({	if (start) \
 		skb_trim(skb, (unsigned char *) (start) - (skb)->data); \
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 06c0c5a..c25d23b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -97,6 +97,21 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
 	return 0;
 }
 
+int rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
+			       struct rtattr *rta, void **data, int len)
+{
+	if (RTA_PAYLOAD(rta) < len)
+		return -1;
+	*data = RTA_DATA(rta);
+
+	if (RTA_PAYLOAD(rta) >= RTA_ALIGN(len) + sizeof(struct rtattr)) {
+		rta = RTA_DATA(rta) + RTA_ALIGN(len);
+		return rtattr_parse_nested(tb, maxattr, rta);
+	}
+	memset(tb, 0, sizeof(struct rtattr *) * maxattr);
+	return 0;
+}
+
 static struct rtnl_link *rtnl_msg_handlers[NPROTO];
 
 static inline int rtm_msgindex(int msgtype)
@@ -1297,6 +1312,7 @@ void __init rtnetlink_init(void)
 EXPORT_SYMBOL(__rta_fill);
 EXPORT_SYMBOL(rtattr_strlcpy);
 EXPORT_SYMBOL(rtattr_parse);
+EXPORT_SYMBOL(rtattr_parse_nested_compat);
 EXPORT_SYMBOL(rtnetlink_put_metrics);
 EXPORT_SYMBOL(rtnl_lock);
 EXPORT_SYMBOL(rtnl_trylock);
-- 
cgit v0.10.2


From 2371baa4bdab3268b32009926f75e7a5d3a41506 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 26 Jun 2007 03:23:44 -0700
Subject: [RTNETLINK]: Fix rtnetlink compat attribute patch

Sent the wrong patch previously.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 6731e7f..c91476c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -570,12 +570,16 @@ static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str)
 }
 
 extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len);
-extern int rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
-				      struct rtattr *rta, void **data, int len);
+extern int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
+				        struct rtattr *rta, int len);
 
 #define rtattr_parse_nested(tb, max, rta) \
 	rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta)))
 
+#define rtattr_parse_nested_compat(tb, max, rta, data, len) \
+({	data = RTA_PAYLOAD(rta) >= len ? RTA_DATA(rta) : NULL; \
+	__rtattr_parse_nested_compat(tb, max, rta, len); })
+
 extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo);
 extern int rtnl_unicast(struct sk_buff *skb, u32 pid);
 extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c25d23b..54c17e4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -97,13 +97,11 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
 	return 0;
 }
 
-int rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
-			       struct rtattr *rta, void **data, int len)
+int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
+			         struct rtattr *rta, int len)
 {
 	if (RTA_PAYLOAD(rta) < len)
 		return -1;
-	*data = RTA_DATA(rta);
-
 	if (RTA_PAYLOAD(rta) >= RTA_ALIGN(len) + sizeof(struct rtattr)) {
 		rta = RTA_DATA(rta) + RTA_ALIGN(len);
 		return rtattr_parse_nested(tb, maxattr, rta);
@@ -1312,7 +1310,7 @@ void __init rtnetlink_init(void)
 EXPORT_SYMBOL(__rta_fill);
 EXPORT_SYMBOL(rtattr_strlcpy);
 EXPORT_SYMBOL(rtattr_parse);
-EXPORT_SYMBOL(rtattr_parse_nested_compat);
+EXPORT_SYMBOL(__rtattr_parse_nested_compat);
 EXPORT_SYMBOL(rtnetlink_put_metrics);
 EXPORT_SYMBOL(rtnl_lock);
 EXPORT_SYMBOL(rtnl_trylock);
-- 
cgit v0.10.2


From 136ebf08b46f839e2dc9db34322b654e5d9b9936 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Tue, 26 Jun 2007 23:51:41 -0700
Subject: [IPV6] MIP6: Kill unnecessary ifdefs.

Kill unnecessary CONFIG_IPV6_MIP6.

o It is redundant for RAW socket to keep MH out with the config then
  it can handle any protocol.
o Clean-up at AH.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/flow.h b/include/net/flow.h
index f3cc1f8..af59fa5 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -67,20 +67,16 @@ struct flowi {
 
 		__be32		spi;
 
-#ifdef CONFIG_IPV6_MIP6
 		struct {
 			__u8	type;
 		} mht;
-#endif
 	} uli_u;
 #define fl_ip_sport	uli_u.ports.sport
 #define fl_ip_dport	uli_u.ports.dport
 #define fl_icmp_type	uli_u.icmpt.type
 #define fl_icmp_code	uli_u.icmpt.code
 #define fl_ipsec_spi	uli_u.spi
-#ifdef CONFIG_IPV6_MIP6
 #define fl_mh_type	uli_u.mht.type
-#endif
 	__u32           secid;	/* used by xfrm; see secid.txt */
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 311f25a..7720c11 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -509,11 +509,9 @@ __be16 xfrm_flowi_sport(struct flowi *fl)
 	case IPPROTO_ICMPV6:
 		port = htons(fl->fl_icmp_type);
 		break;
-#ifdef CONFIG_IPV6_MIP6
 	case IPPROTO_MH:
 		port = htons(fl->fl_mh_type);
 		break;
-#endif
 	default:
 		port = 0;	/*XXX*/
 	}
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 128f94c..01fa302 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -132,6 +132,8 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des
 bad:
 	return;
 }
+#else
+static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt) {}
 #endif
 
 /**
@@ -189,10 +191,8 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
 	while (exthdr.raw < end) {
 		switch (nexthdr) {
 		case NEXTHDR_DEST:
-#ifdef CONFIG_IPV6_MIP6
 			if (dir == XFRM_POLICY_OUT)
 				ipv6_rearrange_destopt(iph, exthdr.opth);
-#endif
 		case NEXTHDR_HOP:
 			if (!zero_out_mutable_opts(exthdr.opth)) {
 				LIMIT_NETDEBUG(
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a58459a..a22c9c9 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -611,9 +611,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
 	struct iovec *iov;
 	u8 __user *type = NULL;
 	u8 __user *code = NULL;
-#ifdef CONFIG_IPV6_MIP6
 	u8 len = 0;
-#endif
 	int probed = 0;
 	int i;
 
@@ -646,7 +644,6 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
 				probed = 1;
 			}
 			break;
-#ifdef CONFIG_IPV6_MIP6
 		case IPPROTO_MH:
 			if (iov->iov_base && iov->iov_len < 1)
 				break;
@@ -660,7 +657,6 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
 				len += iov->iov_len;
 
 			break;
-#endif
 		default:
 			probed = 1;
 			break;
-- 
cgit v0.10.2


From 59fbb3a61e02deaeaa4fb50792217921f3002d64 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Tue, 26 Jun 2007 23:56:32 -0700
Subject: [IPV6] MIP6: Loadable module support for MIPv6.

This patch makes MIPv6 loadable module named "mip6".

Here is a modprobe.conf(5) example to load it automatically
when user application uses XFRM state for MIPv6:

alias xfrm-type-10-43 mip6
alias xfrm-type-10-60 mip6

Some MIPv6 feature is not included by this modular, however,
it should not be affected to other features like either IPsec
or IPv6 with and without the patch.
We may discuss XFRM, MH (RAW socket) and ancillary data/sockopt
separately for future work.

Loadable features:
* MH receiving check (to send ICMP error back)
* RO header parsing and building (i.e. RH2 and HAO in DSTOPTS)
* XFRM policy/state database handling for RO

These are NOT covered as loadable:
* Home Address flags and its rule on source address selection
* XFRM sub policy (depends on its own kernel option)
* XFRM functions to receive RO as IPv6 extension header
* MH sending/receiving through raw socket if user application
  opens it (since raw socket allows to do so)
* RH2 sending as ancillary data
* RH2 operation with setsockopt(2)

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 648bd1f..213b63be 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -247,7 +247,7 @@ struct inet6_skb_parm {
 	__u16			lastopt;
 	__u32			nhoff;
 	__u16			flags;
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	__u16			dsthao;
 #endif
 
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index f3531d0..33b593e 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -61,7 +61,7 @@ extern int			addrconf_set_dstaddr(void __user *arg);
 extern int			ipv6_chk_addr(struct in6_addr *addr,
 					      struct net_device *dev,
 					      int strict);
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 extern int			ipv6_chk_home_addr(struct in6_addr *addr);
 #endif
 extern struct inet6_ifaddr *	ipv6_get_ifaddr(struct in6_addr *addr,
diff --git a/include/net/mip6.h b/include/net/mip6.h
index 68263c6..6327261 100644
--- a/include/net/mip6.h
+++ b/include/net/mip6.h
@@ -54,8 +54,4 @@ struct ip6_mh {
 #define IP6_MH_TYPE_BERROR	7   /* Binding Error */
 #define IP6_MH_TYPE_MAX		IP6_MH_TYPE_BERROR
 
-extern int mip6_init(void);
-extern void mip6_fini(void);
-extern int mip6_mh_filter(struct sock *sk, struct sk_buff *skb);
-
 #endif
diff --git a/include/net/rawv6.h b/include/net/rawv6.h
index af89608..a581989 100644
--- a/include/net/rawv6.h
+++ b/include/net/rawv6.h
@@ -3,6 +3,8 @@
 
 #ifdef __KERNEL__
 
+#include <net/protocol.h>
+
 #define RAWV6_HTABLE_SIZE	MAX_INET_PROTOS
 extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE];
 extern rwlock_t raw_v6_lock;
@@ -23,6 +25,13 @@ extern void			rawv6_err(struct sock *sk,
 					  int type, int code, 
 					  int offset, __be32 info);
 
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
+					   struct sk_buff *skb));
+int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
+					     struct sk_buff *skb));
+#endif
+
 #endif
 
 #endif
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 8e5d54f..eb0b808 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -109,7 +109,7 @@ config INET6_IPCOMP
 	  If unsure, say Y.
 
 config IPV6_MIP6
-	bool "IPv6: Mobility (EXPERIMENTAL)"
+	tristate "IPv6: Mobility (EXPERIMENTAL)"
 	depends on IPV6 && EXPERIMENTAL
 	select XFRM
 	---help---
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index bb33309..87c23a7 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -14,7 +14,6 @@ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
 	xfrm6_output.o
 ipv6-$(CONFIG_NETFILTER) += netfilter.o
 ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
-ipv6-$(CONFIG_IPV6_MIP6) += mip6.o
 ipv6-$(CONFIG_PROC_FS) += proc.o
 
 ipv6-objs += $(ipv6-y)
@@ -28,6 +27,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
 obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
 obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
 obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
+obj-$(CONFIG_IPV6_MIP6) += mip6.o
 obj-$(CONFIG_NETFILTER)	+= netfilter/
 
 obj-$(CONFIG_IPV6_SIT) += sit.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 79b79f3..11c0028 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1034,7 +1034,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 			}
 
 			/* Rule 4: Prefer home address */
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 			if (hiscore.rule < 4) {
 				if (ifa_result->flags & IFA_F_HOMEADDRESS)
 					hiscore.attrs |= IPV6_SADDR_SCORE_HOA;
@@ -2835,7 +2835,7 @@ void if6_proc_exit(void)
 }
 #endif	/* CONFIG_PROC_FS */
 
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 /* Check if address is a home address configured on any interface. */
 int ipv6_chk_home_addr(struct in6_addr *addr)
 {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 6dd3772..eed0937 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -58,9 +58,6 @@
 #ifdef CONFIG_IPV6_TUNNEL
 #include <net/ip6_tunnel.h>
 #endif
-#ifdef CONFIG_IPV6_MIP6
-#include <net/mip6.h>
-#endif
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -853,9 +850,6 @@ static int __init inet6_init(void)
 	ipv6_frag_init();
 	ipv6_nodata_init();
 	ipv6_destopt_init();
-#ifdef CONFIG_IPV6_MIP6
-	mip6_init();
-#endif
 
 	/* Init v6 transport protocols. */
 	udpv6_init();
@@ -921,9 +915,7 @@ static void __exit inet6_exit(void)
 
 	/* Cleanup code parts. */
 	ipv6_packet_cleanup();
-#ifdef CONFIG_IPV6_MIP6
-	mip6_fini();
-#endif
+
 	addrconf_cleanup();
 	ip6_flowlabel_cleanup();
 	ip6_route_cleanup();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 01fa302..cc6884a 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -74,7 +74,7 @@ bad:
 	return 0;
 }
 
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 /**
  *	ipv6_rearrange_destopt - rearrange IPv6 destination options header
  *	@iph: IPv6 header
@@ -228,7 +228,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 	u8 nexthdr;
 	char tmp_base[8];
 	struct {
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 		struct in6_addr saddr;
 #endif
 		struct in6_addr daddr;
@@ -255,7 +255,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 			err = -ENOMEM;
 			goto error;
 		}
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 		memcpy(tmp_ext, &top_iph->saddr, extlen);
 #else
 		memcpy(tmp_ext, &top_iph->daddr, extlen);
@@ -294,7 +294,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	memcpy(top_iph, tmp_base, sizeof(tmp_base));
 	if (tmp_ext) {
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 		memcpy(&top_iph->saddr, tmp_ext, extlen);
 #else
 		memcpy(&top_iph->daddr, tmp_ext, extlen);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index b1fe7ac..ba1386d 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -658,7 +658,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 
 			switch (rthdr->type) {
 			case IPV6_SRCRT_TYPE_0:
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 			case IPV6_SRCRT_TYPE_2:
 #endif
 				break;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 14be0b9..173a4bb 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -42,7 +42,7 @@
 #include <net/ndisc.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 #include <net/xfrm.h>
 #endif
 
@@ -90,6 +90,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
  bad:
 	return -1;
 }
+EXPORT_SYMBOL_GPL(ipv6_find_tlv);
 
 /*
  *	Parsing tlv encoded headers.
@@ -196,7 +197,7 @@ bad:
   Destination options header.
  *****************************/
 
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 {
 	struct sk_buff *skb = *skbp;
@@ -270,7 +271,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 #endif
 
 static struct tlvtype_proc tlvprocdestopt_lst[] = {
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	{
 		.type	= IPV6_TLV_HAO,
 		.func	= ipv6_dest_hao,
@@ -283,7 +284,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	__u16 dstbuf;
 #endif
 	struct dst_entry *dst;
@@ -298,7 +299,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 	}
 
 	opt->lastopt = opt->dst1 = skb_network_header_len(skb);
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	dstbuf = opt->dst1;
 #endif
 
@@ -308,7 +309,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 		skb = *skbp;
 		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
 		opt = IP6CB(skb);
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 		opt->nhoff = dstbuf;
 #else
 		opt->nhoff = opt->dst1;
@@ -427,7 +428,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 looped_back:
 	if (hdr->segments_left == 0) {
 		switch (hdr->type) {
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 		case IPV6_SRCRT_TYPE_2:
 			/* Silently discard type 2 header unless it was
 			 * processed by own
@@ -463,7 +464,7 @@ looped_back:
 			return -1;
 		}
 		break;
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	case IPV6_SRCRT_TYPE_2:
 		/* Silently discard invalid RTH type 2 */
 		if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
@@ -520,7 +521,7 @@ looped_back:
 	addr += i - 1;
 
 	switch (hdr->type) {
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	case IPV6_SRCRT_TYPE_2:
 		if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
 				     (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index e9bcce9..4765a29 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -272,7 +272,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
 	return 0;
 }
 
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 static void mip6_addr_swap(struct sk_buff *skb)
 {
 	struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4704b5f..31dafaf 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -543,7 +543,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 			found_rhdr = 1;
 			break;
 		case NEXTHDR_DEST:
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 				break;
 #endif
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index aa3d07c..b636c38 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -417,7 +417,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 			struct ipv6_rt_hdr *rthdr = opt->srcrt;
 			switch (rthdr->type) {
 			case IPV6_SRCRT_TYPE_0:
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 			case IPV6_SRCRT_TYPE_2:
 #endif
 				break;
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 13b7160..20c78ec 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -30,6 +30,7 @@
 #include <net/sock.h>
 #include <net/ipv6.h>
 #include <net/ip6_checksum.h>
+#include <net/rawv6.h>
 #include <net/xfrm.h>
 #include <net/mip6.h>
 
@@ -86,7 +87,7 @@ static int mip6_mh_len(int type)
 	return len;
 }
 
-int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
+static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
 {
 	struct ip6_mh *mh;
 
@@ -471,7 +472,7 @@ static struct xfrm_type mip6_rthdr_type =
 	.remote_addr	= mip6_xfrm_addr,
 };
 
-int __init mip6_init(void)
+static int __init mip6_init(void)
 {
 	printk(KERN_INFO "Mobile IPv6\n");
 
@@ -483,18 +484,33 @@ int __init mip6_init(void)
 		printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__);
 		goto mip6_rthdr_xfrm_fail;
 	}
+	if (rawv6_mh_filter_register(mip6_mh_filter) < 0) {
+		printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __FUNCTION__);
+		goto mip6_rawv6_mh_fail;
+	}
+
+
 	return 0;
 
+ mip6_rawv6_mh_fail:
+	xfrm_unregister_type(&mip6_rthdr_type, AF_INET6);
  mip6_rthdr_xfrm_fail:
 	xfrm_unregister_type(&mip6_destopt_type, AF_INET6);
  mip6_destopt_xfrm_fail:
 	return -EAGAIN;
 }
 
-void __exit mip6_fini(void)
+static void __exit mip6_fini(void)
 {
+	if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0)
+		printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __FUNCTION__);
 	if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
 		printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__);
 	if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
 		printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__);
 }
+
+module_init(mip6_init);
+module_exit(mip6_fini);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a22c9c9..aac6aeb 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -49,7 +49,7 @@
 #include <net/udp.h>
 #include <net/inet_common.h>
 #include <net/tcp_states.h>
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 #include <net/mip6.h>
 #endif
 
@@ -137,6 +137,28 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+static int (*mh_filter)(struct sock *sock, struct sk_buff *skb);
+
+int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
+					   struct sk_buff *skb))
+{
+	rcu_assign_pointer(mh_filter, filter);
+	return 0;
+}
+EXPORT_SYMBOL(rawv6_mh_filter_register);
+
+int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
+					     struct sk_buff *skb))
+{
+	rcu_assign_pointer(mh_filter, NULL);
+	synchronize_rcu();
+	return 0;
+}
+EXPORT_SYMBOL(rawv6_mh_filter_unregister);
+
+#endif
+
 /*
  *	demultiplex raw sockets.
  *	(should consider queueing the skb in the sock receive_queue
@@ -178,16 +200,22 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 		case IPPROTO_ICMPV6:
 			filtered = icmpv6_filter(sk, skb);
 			break;
-#ifdef CONFIG_IPV6_MIP6
+
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 		case IPPROTO_MH:
+		{
 			/* XXX: To validate MH only once for each packet,
 			 * this is placed here. It should be after checking
 			 * xfrm policy, however it doesn't. The checking xfrm
 			 * policy is placed in rawv6_rcv() because it is
 			 * required for each socket.
 			 */
-			filtered = mip6_mh_filter(sk, skb);
+			int (*filter)(struct sock *sock, struct sk_buff *skb);
+
+			filter = rcu_dereference(mh_filter);
+			filtered = filter ? filter(sk, skb) : 0;
 			break;
+		}
 #endif
 		default:
 			filtered = 0;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 1faa2ea..3ec0c47 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -18,7 +18,7 @@
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 #include <net/mip6.h>
 #endif
 
@@ -318,7 +318,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 			fl->proto = nexthdr;
 			return;
 
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 		case IPPROTO_MH:
 			if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
 				struct ip6_mh *mh;
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index baa461b..cdadb48 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -65,7 +65,7 @@ __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
 		goto end;
 
 	/* Rule 2: select MIPv6 RO or inbound trigger */
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	for (i = 0; i < n; i++) {
 		if (src[i] &&
 		    (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION ||
@@ -130,7 +130,7 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
 		goto end;
 
 	/* Rule 2: select MIPv6 RO or inbound trigger */
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	for (i = 0; i < n; i++) {
 		if (src[i] &&
 		    (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION ||
-- 
cgit v0.10.2


From d3d6dd3adaaad71eae20902ed81808a66a40a5b9 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Tue, 26 Jun 2007 23:57:49 -0700
Subject: [XFRM]: Add module alias for transformation type.

It is clean-up for XFRM type modules and adds aliases with its
protocol:
 ESP, AH, IPCOMP, IPIP and IPv6 for IPsec
 ROUTING and DSTOPTS for MIPv6

It is almost the same thing as XFRM mode alias, but it is added
new defines XFRM_PROTO_XXX for preprocessing since some protocols
are defined as enum.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Acked-by: Ingo Oeser <netdev@axxeo.de>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 7720c11..ee3827f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -19,9 +19,19 @@
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
 
+#define XFRM_PROTO_ESP		50
+#define XFRM_PROTO_AH		51
+#define XFRM_PROTO_COMP		108
+#define XFRM_PROTO_IPIP		4
+#define XFRM_PROTO_IPV6		41
+#define XFRM_PROTO_ROUTING	IPPROTO_ROUTING
+#define XFRM_PROTO_DSTOPTS	IPPROTO_DSTOPTS
+
 #define XFRM_ALIGN8(len)	(((len) + 7) & ~7)
 #define MODULE_ALIAS_XFRM_MODE(family, encap) \
 	MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap))
+#define MODULE_ALIAS_XFRM_TYPE(family, proto) \
+	MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto))
 
 extern struct sock *xfrm_nl;
 extern u32 sysctl_xfrm_aevent_etime;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 6da8ff5..7a23e59 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -339,3 +339,4 @@ static void __exit ah4_fini(void)
 module_init(ah4_init);
 module_exit(ah4_fini);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_AH);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 47c95e8..98767a4 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -481,3 +481,4 @@ static void __exit esp4_fini(void)
 module_init(esp4_init);
 module_exit(esp4_fini);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_ESP);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index ab86137..e787044 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -485,3 +485,4 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173");
 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
 
+MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP);
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 5685103..9275c79 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -109,3 +109,4 @@ static void __exit ipip_fini(void)
 module_init(ipip_init);
 module_exit(ipip_fini);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_IPIP);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index cc6884a..53f46ab 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -554,3 +554,4 @@ module_init(ah6_init);
 module_exit(ah6_fini);
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 7107bb7..2db31ce 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -421,3 +421,4 @@ module_init(esp6_init);
 module_exit(esp6_fini);
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP);
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 1ee50b5..473f165 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -500,4 +500,4 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173");
 MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>");
 
-
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP);
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 20c78ec..8a1399c 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -514,3 +514,5 @@ module_init(mip6_init);
 module_exit(mip6_fini);
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS);
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING);
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 5502cc9..6f87dd5 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -379,3 +379,4 @@ static void __exit xfrm6_tunnel_fini(void)
 module_init(xfrm6_tunnel_init);
 module_exit(xfrm6_tunnel_fini);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6);
-- 
cgit v0.10.2


From d212f87b068c9d72065ef579d85b5ee6b8b59381 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 27 Jun 2007 00:47:37 -0700
Subject: [NET]: IPV6 checksum offloading in network devices

The existing model for checksum offload does not correctly handle
devices that can offload IPV4 and IPV6 only. The NETIF_F_HW_CSUM flag
implies device can do any arbitrary protocol.

This patch:
 * adds NETIF_F_IPV6_CSUM for those devices
 * fixes bnx2 and tg3 devices that need it
 * add NETIF_F_IPV6_CSUM to ipv6 output (incl GSO)
 * fixes assumptions about NETIF_F_ALL_CSUM in nat
 * adjusts bridge union of checksumming computation

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index ce3ed67..0f4f76f 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -6490,10 +6490,10 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	memcpy(dev->perm_addr, bp->mac_addr, 6);
 	bp->name = board_info[ent->driver_data].name;
 
+	dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
 	if (CHIP_NUM(bp) == CHIP_NUM_5709)
-		dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
-	else
-		dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
+		dev->features |= NETIF_F_IPV6_CSUM;
+
 #ifdef BCM_VLAN
 	dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
 #endif
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 2f31841..3a43426 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -11944,12 +11944,11 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 	 * checksumming.
 	 */
 	if ((tp->tg3_flags & TG3_FLAG_BROKEN_CHECKSUMS) == 0) {
+		dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
 		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 ||
 		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787)
-			dev->features |= NETIF_F_HW_CSUM;
-		else
-			dev->features |= NETIF_F_IP_CSUM;
-		dev->features |= NETIF_F_SG;
+			dev->features |= NETIF_F_IPV6_CSUM;
+
 		tp->tg3_flags |= TG3_FLAG_RX_CHECKSUMS;
 	} else
 		tp->tg3_flags &= ~TG3_FLAG_RX_CHECKSUMS;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e7913ee..7a8f22f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -314,9 +314,10 @@ struct net_device
 	/* Net device features */
 	unsigned long		features;
 #define NETIF_F_SG		1	/* Scatter/gather IO. */
-#define NETIF_F_IP_CSUM		2	/* Can checksum only TCP/UDP over IPv4. */
+#define NETIF_F_IP_CSUM		2	/* Can checksum TCP/UDP over IPv4. */
 #define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
 #define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
+#define NETIF_F_IPV6_CSUM	16	/* Can checksum TCP/UDP over IPV6 */
 #define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
 #define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
 #define NETIF_F_HW_VLAN_TX	128	/* Transmit VLAN hw acceleration */
@@ -338,8 +339,11 @@ struct net_device
 	/* List of features with software fallbacks. */
 #define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6)
 
+
 #define NETIF_F_GEN_CSUM	(NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
-#define NETIF_F_ALL_CSUM	(NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM)
+#define NETIF_F_V4_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)
+#define NETIF_F_V6_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
+#define NETIF_F_ALL_CSUM	(NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
 
 	struct net_device	*next_sched;
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 849deaf..7b4ce91 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -368,10 +368,18 @@ void br_features_recompute(struct net_bridge *br)
 	list_for_each_entry(p, &br->port_list, list) {
 		unsigned long feature = p->dev->features;
 
+		/* if device needs checksumming, downgrade to hw checksumming */
 		if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM))
 			checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
+
+		/* if device can't do all checksum, downgrade to ipv4/ipv6 */
 		if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM))
-			checksum ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM;
+			checksum ^= NETIF_F_HW_CSUM
+				| NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+
+		if (checksum & NETIF_F_IPV6_CSUM && !(feature & NETIF_F_IPV6_CSUM))
+			checksum &= ~NETIF_F_IPV6_CSUM;
+
 		if (!(feature & NETIF_F_IP_CSUM))
 			checksum = 0;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index ee051bb..a0a46e7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1509,9 +1509,11 @@ int dev_queue_xmit(struct sk_buff *skb)
 		skb_set_transport_header(skb, skb->csum_start -
 					      skb_headroom(skb));
 
-		if (!(dev->features & NETIF_F_GEN_CSUM) &&
-		    (!(dev->features & NETIF_F_IP_CSUM) ||
-		     skb->protocol != htons(ETH_P_IP)))
+		if (!(dev->features & NETIF_F_GEN_CSUM)
+		    || ((dev->features & NETIF_F_IP_CSUM)
+			&& skb->protocol == htons(ETH_P_IP))
+		    || ((dev->features & NETIF_F_IPV6_CSUM)
+			&& skb->protocol == htons(ETH_P_IPV6)))
 			if (skb_checksum_help(skb))
 				goto out_kfree_skb;
 	}
@@ -3107,6 +3109,22 @@ int register_netdevice(struct net_device *dev)
 		}
 	}
 
+	/* Fix illegal checksum combinations */
+	if ((dev->features & NETIF_F_HW_CSUM) &&
+	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
+		       dev->name);
+		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
+	}
+
+	if ((dev->features & NETIF_F_NO_CSUM) &&
+	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
+		       dev->name);
+		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
+	}
+
+
 	/* Fix illegal SG+CSUM combinations. */
 	if ((dev->features & NETIF_F_SG) &&
 	    !(dev->features & NETIF_F_ALL_CSUM)) {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 041fba3..06c08e5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1170,6 +1170,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 	int ihl;
 	int id;
 
+	if (!(features & NETIF_F_V4_CSUM))
+		features &= ~NETIF_F_SG;
+
 	if (unlikely(skb_shinfo(skb)->gso_type &
 		     ~(SKB_GSO_TCPV4 |
 		       SKB_GSO_UDP |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 34ea454..a7dd343 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -837,7 +837,7 @@ int ip_append_data(struct sock *sk,
 	 */
 	if (transhdrlen &&
 	    length + fragheaderlen <= mtu &&
-	    rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
+	    rt->u.dst.dev->features & NETIF_F_V4_CSUM &&
 	    !exthdrlen)
 		csummode = CHECKSUM_PARTIAL;
 
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 15b6e5c..b1aa598 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -178,7 +178,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
 	datalen = (*pskb)->len - iph->ihl*4;
 	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
 		if (!(rt->rt_flags & RTCF_LOCAL) &&
-		    (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
+		    (*pskb)->dev->features & NETIF_F_V4_CSUM) {
 			(*pskb)->ip_summed = CHECKSUM_PARTIAL;
 			(*pskb)->csum_start = skb_headroom(*pskb) +
 					      skb_network_offset(*pskb) +
@@ -265,7 +265,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
 
 	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
 		if (!(rt->rt_flags & RTCF_LOCAL) &&
-		    (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
+		    (*pskb)->dev->features & NETIF_F_V4_CSUM) {
 			(*pskb)->ip_summed = CHECKSUM_PARTIAL;
 			(*pskb)->csum_start = skb_headroom(*pskb) +
 					      skb_network_offset(*pskb) +
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b636c38..1c35066 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -123,7 +123,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
 	struct ipv6hdr *ipv6h;
 	struct inet6_protocol *ops;
 
-	if (!(features & NETIF_F_HW_CSUM))
+	if (!(features & NETIF_F_V6_CSUM))
 		features &= ~NETIF_F_SG;
 
 	if (unlikely(skb_shinfo(skb)->gso_type &
-- 
cgit v0.10.2


From 75ebe8f73610636be8bbd8d73db883512850e6be Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 27 Jun 2007 01:25:11 -0700
Subject: [NET]: dev_mcast: unexport dev_mc_upload

dev_mc_add/dev_mc_delete take care of uploading the list when
necessary and thats the only interface other code should use.
Also remove two incorrect calls in DECnet.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 5a54053..80bb2e3 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -292,4 +292,3 @@ void __init dev_mcast_init(void)
 
 EXPORT_SYMBOL(dev_mc_add);
 EXPORT_SYMBOL(dev_mc_delete);
-EXPORT_SYMBOL(dev_mc_upload);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index ab41c18..e31549e 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -461,7 +461,6 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
 		if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) {
 			dn_dn2eth(mac_addr, ifa->ifa_local);
 			dev_mc_add(dev, mac_addr, ETH_ALEN, 0);
-			dev_mc_upload(dev);
 		}
 	}
 
@@ -1064,8 +1063,6 @@ static int dn_eth_up(struct net_device *dev)
 	else
 		dev_mc_add(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0);
 
-	dev_mc_upload(dev);
-
 	dn_db->use_long = 1;
 
 	return 0;
-- 
cgit v0.10.2


From bf742482d7a647c5c6f03f78eb35a862e159ecf5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 27 Jun 2007 01:26:19 -0700
Subject: [NET]: dev: introduce generic net_device address lists

Introduce struct dev_addr_list and list maintenance functions
based on dev_mc_list and the related functions. This will be
used by follow-up patches for both multicast and secondary
unicast addresses.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7a8f22f..aa389c7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -177,6 +177,14 @@ struct netif_rx_stats
 
 DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat);
 
+struct dev_addr_list
+{
+	struct dev_addr_list	*next;
+	u8			da_addr[MAX_ADDR_LEN];
+	u8			da_addrlen;
+	int			da_users;
+	int			da_gusers;
+};
 
 /*
  *	We tag multicasts with these structures.
@@ -1008,6 +1016,9 @@ extern void		dev_mc_upload(struct net_device *dev);
 extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
 extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
 extern void		dev_mc_discard(struct net_device *dev);
+extern int 		__dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, int all);
+extern int		__dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int newonly);
+extern void		__dev_addr_discard(struct dev_addr_list **list);
 extern void		dev_set_promiscuity(struct net_device *dev, int inc);
 extern void		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index a0a46e7..18759cc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2553,6 +2553,75 @@ void dev_set_allmulti(struct net_device *dev, int inc)
 		dev_mc_upload(dev);
 }
 
+int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen,
+		      int glbl)
+{
+	struct dev_addr_list *da;
+
+	for (; (da = *list) != NULL; list = &da->next) {
+		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
+		    alen == da->da_addrlen) {
+			if (glbl) {
+				int old_glbl = da->da_gusers;
+				da->da_gusers = 0;
+				if (old_glbl == 0)
+					break;
+			}
+			if (--da->da_users)
+				return 0;
+
+			*list = da->next;
+			kfree(da);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int glbl)
+{
+	struct dev_addr_list *da;
+
+	for (da = *list; da != NULL; da = da->next) {
+		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
+		    da->da_addrlen == alen) {
+			if (glbl) {
+				int old_glbl = da->da_gusers;
+				da->da_gusers = 1;
+				if (old_glbl)
+					return 0;
+			}
+			da->da_users++;
+			return 0;
+		}
+	}
+
+	da = kmalloc(sizeof(*da), GFP_ATOMIC);
+	if (da == NULL)
+		return -ENOMEM;
+	memcpy(da->da_addr, addr, alen);
+	da->da_addrlen = alen;
+	da->da_users = 1;
+	da->da_gusers = glbl ? 1 : 0;
+	da->next = *list;
+	*list = da;
+	return 0;
+}
+
+void __dev_addr_discard(struct dev_addr_list **list)
+{
+	struct dev_addr_list *tmp;
+
+	while (*list != NULL) {
+		tmp = *list;
+		*list = tmp->next;
+		if (tmp->da_users > tmp->da_gusers)
+			printk("__dev_addr_discard: address leakage! "
+			       "da_users=%d\n", tmp->da_users);
+		kfree(tmp);
+	}
+}
+
 unsigned dev_get_flags(const struct net_device *dev)
 {
 	unsigned flags;
-- 
cgit v0.10.2


From 3fba5a8b1e3df2384b90493538161e83cf15dd5f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 27 Jun 2007 01:26:58 -0700
Subject: [NET]: dev_mcast: switch to generic net_device address lists

Use generic net_device address lists for multicast list handling.
Some defines are used to keep drivers working.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index aa389c7..9e114e7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -189,15 +189,12 @@ struct dev_addr_list
 /*
  *	We tag multicasts with these structures.
  */
- 
-struct dev_mc_list
-{	
-	struct dev_mc_list	*next;
-	__u8			dmi_addr[MAX_ADDR_LEN];
-	unsigned char		dmi_addrlen;
-	int			dmi_users;
-	int			dmi_gusers;
-};
+
+#define dev_mc_list	dev_addr_list
+#define dmi_addr	da_addr
+#define dmi_addrlen	da_addrlen
+#define dmi_users	da_users
+#define dmi_gusers	da_gusers
 
 struct hh_cache
 {
@@ -400,7 +397,7 @@ struct net_device
 	unsigned char		addr_len;	/* hardware address length	*/
 	unsigned short          dev_id;		/* for shared network cards */
 
-	struct dev_mc_list	*mc_list;	/* Multicast mac addresses	*/
+	struct dev_addr_list	*mc_list;	/* Multicast mac addresses	*/
 	int			mc_count;	/* Number of installed mcasts	*/
 	int			promiscuity;
 	int			allmulti;
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 80bb2e3..7029074 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -102,47 +102,20 @@ void dev_mc_upload(struct net_device *dev)
 
 int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
 {
-	int err = 0;
-	struct dev_mc_list *dmi, **dmip;
+	int err;
 
 	netif_tx_lock_bh(dev);
+	err = __dev_addr_delete(&dev->mc_list, addr, alen, glbl);
+	if (!err) {
+		dev->mc_count--;
 
-	for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
 		/*
-		 *	Find the entry we want to delete. The device could
-		 *	have variable length entries so check these too.
+		 *	We have altered the list, so the card
+		 *	loaded filter is now wrong. Fix it
 		 */
-		if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
-		    alen == dmi->dmi_addrlen) {
-			if (glbl) {
-				int old_glbl = dmi->dmi_gusers;
-				dmi->dmi_gusers = 0;
-				if (old_glbl == 0)
-					break;
-			}
-			if (--dmi->dmi_users)
-				goto done;
-
-			/*
-			 *	Last user. So delete the entry.
-			 */
-			*dmip = dmi->next;
-			dev->mc_count--;
-
-			kfree(dmi);
-
-			/*
-			 *	We have altered the list, so the card
-			 *	loaded filter is now wrong. Fix it
-			 */
-			__dev_mc_upload(dev);
-
-			netif_tx_unlock_bh(dev);
-			return 0;
-		}
+
+		__dev_mc_upload(dev);
 	}
-	err = -ENOENT;
-done:
 	netif_tx_unlock_bh(dev);
 	return err;
 }
@@ -153,46 +126,15 @@ done:
 
 int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
 {
-	int err = 0;
-	struct dev_mc_list *dmi, *dmi1;
-
-	dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC);
+	int err;
 
 	netif_tx_lock_bh(dev);
-	for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
-		if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
-		    dmi->dmi_addrlen == alen) {
-			if (glbl) {
-				int old_glbl = dmi->dmi_gusers;
-				dmi->dmi_gusers = 1;
-				if (old_glbl)
-					goto done;
-			}
-			dmi->dmi_users++;
-			goto done;
-		}
-	}
-
-	if ((dmi = dmi1) == NULL) {
-		netif_tx_unlock_bh(dev);
-		return -ENOMEM;
+	err = __dev_addr_add(&dev->mc_list, addr, alen, glbl);
+	if (!err) {
+		dev->mc_count++;
+		__dev_mc_upload(dev);
 	}
-	memcpy(dmi->dmi_addr, addr, alen);
-	dmi->dmi_addrlen = alen;
-	dmi->next = dev->mc_list;
-	dmi->dmi_users = 1;
-	dmi->dmi_gusers = glbl ? 1 : 0;
-	dev->mc_list = dmi;
-	dev->mc_count++;
-
-	__dev_mc_upload(dev);
-
 	netif_tx_unlock_bh(dev);
-	return 0;
-
-done:
-	netif_tx_unlock_bh(dev);
-	kfree(dmi1);
 	return err;
 }
 
@@ -203,16 +145,8 @@ done:
 void dev_mc_discard(struct net_device *dev)
 {
 	netif_tx_lock_bh(dev);
-
-	while (dev->mc_list != NULL) {
-		struct dev_mc_list *tmp = dev->mc_list;
-		dev->mc_list = tmp->next;
-		if (tmp->dmi_users > tmp->dmi_gusers)
-			printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
-		kfree(tmp);
-	}
+	__dev_addr_discard(&dev->mc_list);
 	dev->mc_count = 0;
-
 	netif_tx_unlock_bh(dev);
 }
 
@@ -244,7 +178,7 @@ static void dev_mc_seq_stop(struct seq_file *seq, void *v)
 
 static int dev_mc_seq_show(struct seq_file *seq, void *v)
 {
-	struct dev_mc_list *m;
+	struct dev_addr_list *m;
 	struct net_device *dev = v;
 
 	netif_tx_lock_bh(dev);
-- 
cgit v0.10.2


From 4417da668c0021903464f92db278ddae348e0299 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 27 Jun 2007 01:28:10 -0700
Subject: [NET]: dev: secondary unicast address support

Add support for configuring secondary unicast addresses on network
devices. To support this devices capable of filtering multiple
unicast addresses need to change their set_multicast_list function
to configure unicast filters as well and assign it to dev->set_rx_mode
instead of dev->set_multicast_list. Other devices are put into promiscous
mode when secondary unicast addresses are present.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9e114e7..2c0cc19 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -397,6 +397,9 @@ struct net_device
 	unsigned char		addr_len;	/* hardware address length	*/
 	unsigned short          dev_id;		/* for shared network cards */
 
+	struct dev_addr_list	*uc_list;	/* Secondary unicast mac addresses */
+	int			uc_count;	/* Number of installed ucasts	*/
+	int			uc_promisc;
 	struct dev_addr_list	*mc_list;	/* Multicast mac addresses	*/
 	int			mc_count;	/* Number of installed mcasts	*/
 	int			promiscuity;
@@ -502,6 +505,8 @@ struct net_device
 						void *saddr,
 						unsigned len);
 	int			(*rebuild_header)(struct sk_buff *skb);
+#define HAVE_SET_RX_MODE
+	void			(*set_rx_mode)(struct net_device *dev);
 #define HAVE_MULTICAST			 
 	void			(*set_multicast_list)(struct net_device *dev);
 #define HAVE_SET_MAC_ADDR  		 
@@ -1008,8 +1013,11 @@ extern struct net_device *alloc_netdev(int sizeof_priv, const char *name,
 				       void (*setup)(struct net_device *));
 extern int		register_netdev(struct net_device *dev);
 extern void		unregister_netdev(struct net_device *dev);
-/* Functions used for multicast support */
-extern void		dev_mc_upload(struct net_device *dev);
+/* Functions used for secondary unicast and multicast support */
+extern void		dev_set_rx_mode(struct net_device *dev);
+extern void		__dev_set_rx_mode(struct net_device *dev);
+extern int		dev_unicast_delete(struct net_device *dev, void *addr, int alen);
+extern int		dev_unicast_add(struct net_device *dev, void *addr, int alen);
 extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
 extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
 extern void		dev_mc_discard(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index 18759cc..36e9bf8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -942,7 +942,7 @@ int dev_open(struct net_device *dev)
 		/*
 		 *	Initialize multicasting status
 		 */
-		dev_mc_upload(dev);
+		dev_set_rx_mode(dev);
 
 		/*
 		 *	Wakeup transmit queue engine
@@ -2498,17 +2498,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
 	return 0;
 }
 
-/**
- *	dev_set_promiscuity	- update promiscuity count on a device
- *	@dev: device
- *	@inc: modifier
- *
- *	Add or remove promiscuity from a device. While the count in the device
- *	remains above zero the interface remains promiscuous. Once it hits zero
- *	the device reverts back to normal filtering operation. A negative inc
- *	value is used to drop promiscuity on the device.
- */
-void dev_set_promiscuity(struct net_device *dev, int inc)
+static void __dev_set_promiscuity(struct net_device *dev, int inc)
 {
 	unsigned short old_flags = dev->flags;
 
@@ -2517,7 +2507,6 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
 	else
 		dev->flags |= IFF_PROMISC;
 	if (dev->flags != old_flags) {
-		dev_mc_upload(dev);
 		printk(KERN_INFO "device %s %s promiscuous mode\n",
 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
 							       "left");
@@ -2531,6 +2520,25 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
 }
 
 /**
+ *	dev_set_promiscuity	- update promiscuity count on a device
+ *	@dev: device
+ *	@inc: modifier
+ *
+ *	Add or remove promiscuity from a device. While the count in the device
+ *	remains above zero the interface remains promiscuous. Once it hits zero
+ *	the device reverts back to normal filtering operation. A negative inc
+ *	value is used to drop promiscuity on the device.
+ */
+void dev_set_promiscuity(struct net_device *dev, int inc)
+{
+	unsigned short old_flags = dev->flags;
+
+	__dev_set_promiscuity(dev, inc);
+	if (dev->flags != old_flags)
+		dev_set_rx_mode(dev);
+}
+
+/**
  *	dev_set_allmulti	- update allmulti count on a device
  *	@dev: device
  *	@inc: modifier
@@ -2550,7 +2558,48 @@ void dev_set_allmulti(struct net_device *dev, int inc)
 	if ((dev->allmulti += inc) == 0)
 		dev->flags &= ~IFF_ALLMULTI;
 	if (dev->flags ^ old_flags)
-		dev_mc_upload(dev);
+		dev_set_rx_mode(dev);
+}
+
+/*
+ *	Upload unicast and multicast address lists to device and
+ *	configure RX filtering. When the device doesn't support unicast
+ *	filtering it is put in promiscous mode while unicast addresses
+ *	are present.
+ */
+void __dev_set_rx_mode(struct net_device *dev)
+{
+	/* dev_open will call this function so the list will stay sane. */
+	if (!(dev->flags&IFF_UP))
+		return;
+
+	if (!netif_device_present(dev))
+	        return;
+
+	if (dev->set_rx_mode)
+		dev->set_rx_mode(dev);
+	else {
+		/* Unicast addresses changes may only happen under the rtnl,
+		 * therefore calling __dev_set_promiscuity here is safe.
+		 */
+		if (dev->uc_count > 0 && !dev->uc_promisc) {
+			__dev_set_promiscuity(dev, 1);
+			dev->uc_promisc = 1;
+		} else if (dev->uc_count == 0 && dev->uc_promisc) {
+			__dev_set_promiscuity(dev, -1);
+			dev->uc_promisc = 0;
+		}
+
+		if (dev->set_multicast_list)
+			dev->set_multicast_list(dev);
+	}
+}
+
+void dev_set_rx_mode(struct net_device *dev)
+{
+	netif_tx_lock_bh(dev);
+	__dev_set_rx_mode(dev);
+	netif_tx_unlock_bh(dev);
 }
 
 int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen,
@@ -2622,6 +2671,66 @@ void __dev_addr_discard(struct dev_addr_list **list)
 	}
 }
 
+/**
+ *	dev_unicast_delete	- Release secondary unicast address.
+ *	@dev: device
+ *
+ *	Release reference to a secondary unicast address and remove it
+ *	from the device if the reference count drop to zero.
+ *
+ * 	The caller must hold the rtnl_mutex.
+ */
+int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	netif_tx_lock_bh(dev);
+	err = __dev_addr_delete(&dev->uc_list, addr, alen, 0);
+	if (!err) {
+		dev->uc_count--;
+		__dev_set_rx_mode(dev);
+	}
+	netif_tx_unlock_bh(dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_unicast_delete);
+
+/**
+ *	dev_unicast_add		- add a secondary unicast address
+ *	@dev: device
+ *
+ *	Add a secondary unicast address to the device or increase
+ *	the reference count if it already exists.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_unicast_add(struct net_device *dev, void *addr, int alen)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	netif_tx_lock_bh(dev);
+	err = __dev_addr_add(&dev->uc_list, addr, alen, 0);
+	if (!err) {
+		dev->uc_count++;
+		__dev_set_rx_mode(dev);
+	}
+	netif_tx_unlock_bh(dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_unicast_add);
+
+static void dev_unicast_discard(struct net_device *dev)
+{
+	netif_tx_lock_bh(dev);
+	__dev_addr_discard(&dev->uc_list);
+	dev->uc_count = 0;
+	netif_tx_unlock_bh(dev);
+}
+
 unsigned dev_get_flags(const struct net_device *dev)
 {
 	unsigned flags;
@@ -2665,7 +2774,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 	 *	Load in the correct multicast list now the flags have changed.
 	 */
 
-	dev_mc_upload(dev);
+	dev_set_rx_mode(dev);
 
 	/*
 	 *	Have we downed the interface. We handle IFF_UP ourselves
@@ -2678,7 +2787,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
 
 		if (!ret)
-			dev_mc_upload(dev);
+			dev_set_rx_mode(dev);
 	}
 
 	if (dev->flags & IFF_UP &&
@@ -3558,8 +3667,9 @@ void unregister_netdevice(struct net_device *dev)
 	raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
 
 	/*
-	 *	Flush the multicast chain
+	 *	Flush the unicast and multicast chains
 	 */
+	dev_unicast_discard(dev);
 	dev_mc_discard(dev);
 
 	if (dev->uninit)
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 7029074..5cc9b44 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -64,39 +64,6 @@
  */
 
 /*
- *	Update the multicast list into the physical NIC controller.
- */
-
-static void __dev_mc_upload(struct net_device *dev)
-{
-	/* Don't do anything till we up the interface
-	 * [dev_open will call this function so the list will
-	 * stay sane]
-	 */
-
-	if (!(dev->flags&IFF_UP))
-		return;
-
-	/*
-	 *	Devices with no set multicast or which have been
-	 *	detached don't get set.
-	 */
-
-	if (dev->set_multicast_list == NULL ||
-	    !netif_device_present(dev))
-		return;
-
-	dev->set_multicast_list(dev);
-}
-
-void dev_mc_upload(struct net_device *dev)
-{
-	netif_tx_lock_bh(dev);
-	__dev_mc_upload(dev);
-	netif_tx_unlock_bh(dev);
-}
-
-/*
  *	Delete a device level multicast
  */
 
@@ -114,7 +81,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
 		 *	loaded filter is now wrong. Fix it
 		 */
 
-		__dev_mc_upload(dev);
+		__dev_set_rx_mode(dev);
 	}
 	netif_tx_unlock_bh(dev);
 	return err;
@@ -132,7 +99,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
 	err = __dev_addr_add(&dev->mc_list, addr, alen, glbl);
 	if (!err) {
 		dev->mc_count++;
-		__dev_mc_upload(dev);
+		__dev_set_rx_mode(dev);
 	}
 	netif_tx_unlock_bh(dev);
 	return err;
-- 
cgit v0.10.2


From 342f0234c71b40da785dd6a7ce1dd481ecbfdb81 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Wed, 27 Jun 2007 15:37:46 -0700
Subject: [UDP]: Introduce UDP encapsulation type for L2TP

This patch adds a new UDP_ENCAP_L2TPINUDP encapsulation type for UDP
sockets. When a UDP socket's encap_type is UDP_ENCAP_L2TPINUDP, the
skb is delivered to a function pointed to by the udp_sock's
encap_rcv funcptr. If the skb isn't wanted by L2TP, it returns >0, which
causes it to be passed through to UDP.

Include padding to put the new encap_rcv field on a 4-byte boundary.

Previously, the only user of UDP encap sockets was ESP, so when
CONFIG_XFRM was not defined, some of the encap code was compiled
out. This patch changes that. As a result, udp_encap_rcv() will
now do a little more work when CONFIG_XFRM is not defined.

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 6de445c..8ec703f 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -42,6 +42,7 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
 /* UDP encapsulation types */
 #define UDP_ENCAP_ESPINUDP_NON_IKE	1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
 #define UDP_ENCAP_ESPINUDP	2 /* draft-ietf-ipsec-udp-encaps-06 */
+#define UDP_ENCAP_L2TPINUDP	3 /* rfc2661 */
 
 #ifdef __KERNEL__
 #include <linux/types.h>
@@ -70,6 +71,11 @@ struct udp_sock {
 #define UDPLITE_SEND_CC  0x2  		/* set via udplite setsockopt         */
 #define UDPLITE_RECV_CC  0x4		/* set via udplite setsocktopt        */
 	__u8		 pcflag;        /* marks socket as UDP-Lite if > 0    */
+	__u8		 unused[3];
+	/*
+	 * For encapsulation sockets.
+	 */
+	int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
 };
 
 static inline struct udp_sock *udp_sk(const struct sock *sk)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index facb7e2..b9276f8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -70,6 +70,7 @@
  *	Alexey Kuznetsov:		allow both IPv4 and IPv6 sockets to bind
  *					a single port at the same time.
  *	Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
+ *	James Chapman		:	Add L2TP encapsulation type.
  *
  *
  *		This program is free software; you can redistribute it and/or
@@ -923,12 +924,10 @@ int udp_disconnect(struct sock *sk, int flags)
  * 	1  if the UDP system should process it
  *	0  if we should drop this packet
  * 	-1 if it should get processed by xfrm4_rcv_encap
+ *	-2 if it should get processed by l2tp
  */
 static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
 {
-#ifndef CONFIG_XFRM
-	return 1;
-#else
 	struct udp_sock *up = udp_sk(sk);
 	struct udphdr *uh;
 	struct iphdr *iph;
@@ -983,8 +982,14 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
 			/* Must be an IKE packet.. pass it through */
 			return 1;
 		break;
+	case UDP_ENCAP_L2TPINUDP:
+		/* Let caller know to send this to l2tp */
+		return -2;
 	}
 
+#ifndef CONFIG_XFRM
+	return 1;
+#else
 	/* At this point we are sure that this is an ESPinUDP packet,
 	 * so we need to remove 'len' bytes from the packet (the UDP
 	 * header and optional ESP marker bytes) and then modify the
@@ -1055,12 +1060,25 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 			kfree_skb(skb);
 			return 0;
 		}
-		if (ret < 0) {
+		if (ret == -1) {
 			/* process the ESP packet */
 			ret = xfrm4_rcv_encap(skb, up->encap_type);
 			UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
 			return -ret;
 		}
+		if (ret == -2) {
+			/* process the L2TP packet */
+			if (up->encap_rcv != NULL) {
+				ret = (*up->encap_rcv)(sk, skb);
+				if (ret <= 0) {
+					UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
+					return ret;
+				}
+
+				/* FALLTHROUGH -- pass up as UDP packet */
+			}
+		}
+
 		/* FALLTHROUGH -- it's a UDP Packet */
 	}
 
@@ -1349,6 +1367,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		case 0:
 		case UDP_ENCAP_ESPINUDP:
 		case UDP_ENCAP_ESPINUDP_NON_IKE:
+		case UDP_ENCAP_L2TPINUDP:
 			up->encap_type = val;
 			break;
 		default:
-- 
cgit v0.10.2


From cf14a4d06742d59ecb2d837a3f53bb24d1ff9acb Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Wed, 27 Jun 2007 15:43:43 -0700
Subject: [L2TP]: Changes to existing ppp and socket kernel headers for L2TP

Add struct sockaddr_pppol2tp to carry L2TP-specific address
information for the PPPoX (PPPoL2TP) socket. Unfortunately we can't
use the union inside struct sockaddr_pppox because the L2TP-specific
data is larger than the current size of the union and we must preserve
the size of struct sockaddr_pppox for binary compatibility.

Also add a PPPIOCGL2TPSTATS ioctl to allow userspace to obtain
L2TP counters and state from the kernel.

Add new if_pppol2tp.h header.

[ Modified to use aligned_u64 in statistics structure -DaveM ]

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index d944516..127d2d1 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -225,6 +225,7 @@ unifdef-y += if_fddi.h
 unifdef-y += if_frad.h
 unifdef-y += if_ltalk.h
 unifdef-y += if_link.h
+unifdef-y += if_pppol2tp.h
 unifdef-y += if_pppox.h
 unifdef-y += if_shaper.h
 unifdef-y += if_tr.h
diff --git a/include/linux/if_ppp.h b/include/linux/if_ppp.h
index 768372f..0f2f70d 100644
--- a/include/linux/if_ppp.h
+++ b/include/linux/if_ppp.h
@@ -110,6 +110,21 @@ struct ifpppcstatsreq {
 	struct ppp_comp_stats stats;
 };
 
+/* For PPPIOCGL2TPSTATS */
+struct pppol2tp_ioc_stats {
+	__u16		tunnel_id;	/* redundant */
+	__u16		session_id;	/* if zero, get tunnel stats */
+	__u32		using_ipsec:1;	/* valid only for session_id == 0 */
+	aligned_u64	tx_packets;
+	aligned_u64	tx_bytes;
+	aligned_u64	tx_errors;
+	aligned_u64	rx_packets;
+	aligned_u64	rx_bytes;
+	aligned_u64	rx_seq_discards;
+	aligned_u64	rx_oos_packets;
+	aligned_u64	rx_errors;
+};
+
 #define ifr__name       b.ifr_ifrn.ifrn_name
 #define stats_ptr       b.ifr_ifru.ifru_data
 
@@ -146,6 +161,7 @@ struct ifpppcstatsreq {
 #define PPPIOCDISCONN	_IO('t', 57)		/* disconnect channel */
 #define PPPIOCATTCHAN	_IOW('t', 56, int)	/* attach to ppp channel */
 #define PPPIOCGCHAN	_IOR('t', 55, int)	/* get ppp channel number */
+#define PPPIOCGL2TPSTATS _IOR('t', 54, struct pppol2tp_ioc_stats)
 
 #define SIOCGPPPSTATS   (SIOCDEVPRIVATE + 0)
 #define SIOCGPPPVER     (SIOCDEVPRIVATE + 1)	/* NEVER change this!! */
diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h
new file mode 100644
index 0000000..516203b
--- /dev/null
+++ b/include/linux/if_pppol2tp.h
@@ -0,0 +1,69 @@
+/***************************************************************************
+ * Linux PPP over L2TP (PPPoL2TP) Socket Implementation (RFC 2661)
+ *
+ * This file supplies definitions required by the PPP over L2TP driver
+ * (pppol2tp.c).  All version information wrt this file is located in pppol2tp.c
+ *
+ * License:
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef __LINUX_IF_PPPOL2TP_H
+#define __LINUX_IF_PPPOL2TP_H
+
+#include <asm/types.h>
+
+#ifdef __KERNEL__
+#include <linux/in.h>
+#endif
+
+/* Structure used to connect() the socket to a particular tunnel UDP
+ * socket.
+ */
+struct pppol2tp_addr
+{
+	pid_t	pid;			/* pid that owns the fd.
+					 * 0 => current */
+	int	fd;			/* FD of UDP socket to use */
+
+	struct sockaddr_in addr;	/* IP address and port to send to */
+
+	__be16 s_tunnel, s_session;	/* For matching incoming packets */
+	__be16 d_tunnel, d_session;	/* For sending outgoing packets */
+};
+
+/* Socket options:
+ * DEBUG	- bitmask of debug message categories
+ * SENDSEQ	- 0 => don't send packets with sequence numbers
+ *		  1 => send packets with sequence numbers
+ * RECVSEQ	- 0 => receive packet sequence numbers are optional
+ *		  1 => drop receive packets without sequence numbers
+ * LNSMODE	- 0 => act as LAC.
+ *		  1 => act as LNS.
+ * REORDERTO	- reorder timeout (in millisecs). If 0, don't try to reorder.
+ */
+enum {
+	PPPOL2TP_SO_DEBUG	= 1,
+	PPPOL2TP_SO_RECVSEQ	= 2,
+	PPPOL2TP_SO_SENDSEQ	= 3,
+	PPPOL2TP_SO_LNSMODE	= 4,
+	PPPOL2TP_SO_REORDERTO	= 5,
+};
+
+/* Debug message categories for the DEBUG socket option */
+enum {
+	PPPOL2TP_MSG_DEBUG	= (1 << 0),	/* verbose debug (if
+						 * compiled in) */
+	PPPOL2TP_MSG_CONTROL	= (1 << 1),	/* userspace - kernel
+						 * interface */
+	PPPOL2TP_MSG_SEQ	= (1 << 2),	/* sequence numbers */
+	PPPOL2TP_MSG_DATA	= (1 << 3),	/* data packets */
+};
+
+
+
+#endif
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 6f987be..2565254 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -27,6 +27,7 @@
 #include <asm/semaphore.h>
 #include <linux/ppp_channel.h>
 #endif /* __KERNEL__ */
+#include <linux/if_pppol2tp.h>
 
 /* For user-space programs to pick up these definitions
  * which they wouldn't get otherwise without defining __KERNEL__
@@ -50,8 +51,9 @@ struct pppoe_addr{
  * Protocols supported by AF_PPPOX 
  */ 
 #define PX_PROTO_OE    0 /* Currently just PPPoE */
-#define PX_MAX_PROTO   1	
- 
+#define PX_PROTO_OL2TP 1 /* Now L2TP also */
+#define PX_MAX_PROTO   2
+
 struct sockaddr_pppox { 
        sa_family_t     sa_family;            /* address family, AF_PPPOX */ 
        unsigned int    sa_protocol;          /* protocol identifier */ 
@@ -60,6 +62,16 @@ struct sockaddr_pppox {
        }sa_addr; 
 }__attribute__ ((packed)); 
 
+/* The use of the above union isn't viable because the size of this
+ * struct must stay fixed over time -- applications use sizeof(struct
+ * sockaddr_pppox) to fill it. We use a protocol specific sockaddr
+ * type instead.
+ */
+struct sockaddr_pppol2tp {
+	sa_family_t     sa_family;      /* address family, AF_PPPOX */
+	unsigned int    sa_protocol;    /* protocol identifier */
+	struct pppol2tp_addr pppol2tp;
+}__attribute__ ((packed));
 
 /*********************************************************************
  *
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 6e7c948..fe195c9 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -287,6 +287,7 @@ struct ucred {
 #define SOL_NETLINK	270
 #define SOL_TIPC	271
 #define SOL_RXRPC	272
+#define SOL_PPPOL2TP	273
 
 /* IPX options */
 #define IPX_TYPE	1
-- 
cgit v0.10.2


From 3557baabf28088f49bdf72a048fd33ab62e205b1 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Wed, 27 Jun 2007 15:49:24 -0700
Subject: [L2TP]: PPP over L2TP driver core

This driver handles only L2TP data frames; control frames are handled
by a userspace application. It implements L2TP using the PPPoX socket
family. There is a PPPoX socket for each L2TP session in an L2TP
tunnel.  PPP data within each session is passed through the kernel's
PPP subsystem via this driver. Kernel parameters of each socket can be
read or modified using ioctl() or [gs]etsockopt() calls.

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index b941c74..c251cca 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2784,6 +2784,19 @@ config PPPOATM
 	  which can lead to bad results if the ATM peer loses state and
 	  changes its encapsulation unilaterally.
 
+config PPPOL2TP
+	tristate "PPP over L2TP (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && PPP
+	help
+	  Support for PPP-over-L2TP socket family. L2TP is a protocol
+	  used by ISPs and enterprises to tunnel PPP traffic over UDP
+	  tunnels. L2TP is replacing PPTP for VPN uses.
+
+	  This kernel component handles only L2TP data packets: a
+	  userland daemon handles L2TP the control protocol (tunnel
+	  and session setup). One such daemon is OpenL2TP
+	  (http://openl2tp.sourceforge.net/).
+
 config SLIP
 	tristate "SLIP (serial line) support"
 	---help---
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 1bbcbed..a2241e6 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -121,6 +121,7 @@ obj-$(CONFIG_PPP_DEFLATE) += ppp_deflate.o
 obj-$(CONFIG_PPP_BSDCOMP) += bsd_comp.o
 obj-$(CONFIG_PPP_MPPE) += ppp_mppe.o
 obj-$(CONFIG_PPPOE) += pppox.o pppoe.o
+obj-$(CONFIG_PPPOL2TP) += pppox.o pppol2tp.o
 
 obj-$(CONFIG_SLIP) += slip.o
 obj-$(CONFIG_SLHC) += slhc.o
diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
new file mode 100644
index 0000000..8a6bff5
--- /dev/null
+++ b/drivers/net/pppol2tp.c
@@ -0,0 +1,2486 @@
+/*****************************************************************************
+ * Linux PPP over L2TP (PPPoX/PPPoL2TP) Sockets
+ *
+ * PPPoX    --- Generic PPP encapsulation socket family
+ * PPPoL2TP --- PPP over L2TP (RFC 2661)
+ *
+ * Version:	1.0.0
+ *
+ * Authors:	Martijn van Oosterhout <kleptog@svana.org>
+ *		James Chapman (jchapman@katalix.com)
+ * Contributors:
+ *		Michal Ostrowski <mostrows@speakeasy.net>
+ *		Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
+ *		David S. Miller (davem@redhat.com)
+ *
+ * License:
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* This driver handles only L2TP data frames; control frames are handled by a
+ * userspace application.
+ *
+ * To send data in an L2TP session, userspace opens a PPPoL2TP socket and
+ * attaches it to a bound UDP socket with local tunnel_id / session_id and
+ * peer tunnel_id / session_id set. Data can then be sent or received using
+ * regular socket sendmsg() / recvmsg() calls. Kernel parameters of the socket
+ * can be read or modified using ioctl() or [gs]etsockopt() calls.
+ *
+ * When a PPPoL2TP socket is connected with local and peer session_id values
+ * zero, the socket is treated as a special tunnel management socket.
+ *
+ * Here's example userspace code to create a socket for sending/receiving data
+ * over an L2TP session:-
+ *
+ *	struct sockaddr_pppol2tp sax;
+ *	int fd;
+ *	int session_fd;
+ *
+ *	fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
+ *
+ *	sax.sa_family = AF_PPPOX;
+ *	sax.sa_protocol = PX_PROTO_OL2TP;
+ *	sax.pppol2tp.fd = tunnel_fd;	// bound UDP socket
+ *	sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
+ *	sax.pppol2tp.addr.sin_port = addr->sin_port;
+ *	sax.pppol2tp.addr.sin_family = AF_INET;
+ *	sax.pppol2tp.s_tunnel  = tunnel_id;
+ *	sax.pppol2tp.s_session = session_id;
+ *	sax.pppol2tp.d_tunnel  = peer_tunnel_id;
+ *	sax.pppol2tp.d_session = peer_session_id;
+ *
+ *	session_fd = connect(fd, (struct sockaddr *)&sax, sizeof(sax));
+ *
+ * A pppd plugin that allows PPP traffic to be carried over L2TP using
+ * this driver is available from the OpenL2TP project at
+ * http://openl2tp.sourceforge.net.
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/if_pppox.h>
+#include <linux/if_pppol2tp.h>
+#include <net/sock.h>
+#include <linux/ppp_channel.h>
+#include <linux/ppp_defs.h>
+#include <linux/if_ppp.h>
+#include <linux/file.h>
+#include <linux/hash.h>
+#include <linux/sort.h>
+#include <linux/proc_fs.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+
+#include <asm/byteorder.h>
+#include <asm/atomic.h>
+
+
+#define PPPOL2TP_DRV_VERSION	"V1.0"
+
+/* L2TP header constants */
+#define L2TP_HDRFLAG_T	   0x8000
+#define L2TP_HDRFLAG_L	   0x4000
+#define L2TP_HDRFLAG_S	   0x0800
+#define L2TP_HDRFLAG_O	   0x0200
+#define L2TP_HDRFLAG_P	   0x0100
+
+#define L2TP_HDR_VER_MASK  0x000F
+#define L2TP_HDR_VER	   0x0002
+
+/* Space for UDP, L2TP and PPP headers */
+#define PPPOL2TP_HEADER_OVERHEAD	40
+
+/* Just some random numbers */
+#define L2TP_TUNNEL_MAGIC	0x42114DDA
+#define L2TP_SESSION_MAGIC	0x0C04EB7D
+
+#define PPPOL2TP_HASH_BITS	4
+#define PPPOL2TP_HASH_SIZE	(1 << PPPOL2TP_HASH_BITS)
+
+/* Default trace flags */
+#define PPPOL2TP_DEFAULT_DEBUG_FLAGS	0
+
+#define PRINTK(_mask, _type, _lvl, _fmt, args...)			\
+	do {								\
+		if ((_mask) & (_type))					\
+			printk(_lvl "PPPOL2TP: " _fmt, ##args);		\
+	} while(0)
+
+/* Number of bytes to build transmit L2TP headers.
+ * Unfortunately the size is different depending on whether sequence numbers
+ * are enabled.
+ */
+#define PPPOL2TP_L2TP_HDR_SIZE_SEQ		10
+#define PPPOL2TP_L2TP_HDR_SIZE_NOSEQ		6
+
+struct pppol2tp_tunnel;
+
+/* Describes a session. It is the sk_user_data field in the PPPoL2TP
+ * socket. Contains information to determine incoming packets and transmit
+ * outgoing ones.
+ */
+struct pppol2tp_session
+{
+	int			magic;		/* should be
+						 * L2TP_SESSION_MAGIC */
+	int			owner;		/* pid that opened the socket */
+
+	struct sock		*sock;		/* Pointer to the session
+						 * PPPoX socket */
+	struct sock		*tunnel_sock;	/* Pointer to the tunnel UDP
+						 * socket */
+
+	struct pppol2tp_addr	tunnel_addr;	/* Description of tunnel */
+
+	struct pppol2tp_tunnel	*tunnel;	/* back pointer to tunnel
+						 * context */
+
+	char			name[20];	/* "sess xxxxx/yyyyy", where
+						 * x=tunnel_id, y=session_id */
+	int			mtu;
+	int			mru;
+	int			flags;		/* accessed by PPPIOCGFLAGS.
+						 * Unused. */
+	unsigned		recv_seq:1;	/* expect receive packets with
+						 * sequence numbers? */
+	unsigned		send_seq:1;	/* send packets with sequence
+						 * numbers? */
+	unsigned		lns_mode:1;	/* behave as LNS? LAC enables
+						 * sequence numbers under
+						 * control of LNS. */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	int			reorder_timeout; /* configured reorder timeout
+						  * (in jiffies) */
+	u16			nr;		/* session NR state (receive) */
+	u16			ns;		/* session NR state (send) */
+	struct sk_buff_head	reorder_q;	/* receive reorder queue */
+	struct pppol2tp_ioc_stats stats;
+	struct hlist_node	hlist;		/* Hash list node */
+};
+
+/* The sk_user_data field of the tunnel's UDP socket. It contains info to track
+ * all the associated sessions so incoming packets can be sorted out
+ */
+struct pppol2tp_tunnel
+{
+	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
+	rwlock_t		hlist_lock;	/* protect session_hlist */
+	struct hlist_head	session_hlist[PPPOL2TP_HASH_SIZE];
+						/* hashed list of sessions,
+						 * hashed by id */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	char			name[12];	/* "tunl xxxxx" */
+	struct pppol2tp_ioc_stats stats;
+
+	void (*old_sk_destruct)(struct sock *);
+
+	struct sock		*sock;		/* Parent socket */
+	struct list_head	list;		/* Keep a list of all open
+						 * prepared sockets */
+
+	atomic_t		ref_count;
+};
+
+/* Private data stored for received packets in the skb.
+ */
+struct pppol2tp_skb_cb {
+	u16			ns;
+	u16			nr;
+	u16			has_seq;
+	u16			length;
+	unsigned long		expires;
+};
+
+#define PPPOL2TP_SKB_CB(skb)	((struct pppol2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
+
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
+static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel);
+
+static atomic_t pppol2tp_tunnel_count;
+static atomic_t pppol2tp_session_count;
+static struct ppp_channel_ops pppol2tp_chan_ops = { pppol2tp_xmit , NULL };
+static struct proto_ops pppol2tp_ops;
+static LIST_HEAD(pppol2tp_tunnel_list);
+static DEFINE_RWLOCK(pppol2tp_tunnel_list_lock);
+
+/* Helpers to obtain tunnel/session contexts from sockets.
+ */
+static inline struct pppol2tp_session *pppol2tp_sock_to_session(struct sock *sk)
+{
+	struct pppol2tp_session *session;
+
+	if (sk == NULL)
+		return NULL;
+
+	session = (struct pppol2tp_session *)(sk->sk_user_data);
+	if (session == NULL)
+		return NULL;
+
+	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+
+	return session;
+}
+
+static inline struct pppol2tp_tunnel *pppol2tp_sock_to_tunnel(struct sock *sk)
+{
+	struct pppol2tp_tunnel *tunnel;
+
+	if (sk == NULL)
+		return NULL;
+
+	tunnel = (struct pppol2tp_tunnel *)(sk->sk_user_data);
+	if (tunnel == NULL)
+		return NULL;
+
+	BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+
+	return tunnel;
+}
+
+/* Tunnel reference counts. Incremented per session that is added to
+ * the tunnel.
+ */
+static inline void pppol2tp_tunnel_inc_refcount(struct pppol2tp_tunnel *tunnel)
+{
+	atomic_inc(&tunnel->ref_count);
+}
+
+static inline void pppol2tp_tunnel_dec_refcount(struct pppol2tp_tunnel *tunnel)
+{
+	if (atomic_dec_and_test(&tunnel->ref_count))
+		pppol2tp_tunnel_free(tunnel);
+}
+
+/* Session hash list.
+ * The session_id SHOULD be random according to RFC2661, but several
+ * L2TP implementations (Cisco and Microsoft) use incrementing
+ * session_ids.  So we do a real hash on the session_id, rather than a
+ * simple bitmask.
+ */
+static inline struct hlist_head *
+pppol2tp_session_id_hash(struct pppol2tp_tunnel *tunnel, u16 session_id)
+{
+	unsigned long hash_val = (unsigned long) session_id;
+	return &tunnel->session_hlist[hash_long(hash_val, PPPOL2TP_HASH_BITS)];
+}
+
+/* Lookup a session by id
+ */
+static struct pppol2tp_session *
+pppol2tp_session_find(struct pppol2tp_tunnel *tunnel, u16 session_id)
+{
+	struct hlist_head *session_list =
+		pppol2tp_session_id_hash(tunnel, session_id);
+	struct pppol2tp_session *session;
+	struct hlist_node *walk;
+
+	read_lock(&tunnel->hlist_lock);
+	hlist_for_each_entry(session, walk, session_list, hlist) {
+		if (session->tunnel_addr.s_session == session_id) {
+			read_unlock(&tunnel->hlist_lock);
+			return session;
+		}
+	}
+	read_unlock(&tunnel->hlist_lock);
+
+	return NULL;
+}
+
+/* Lookup a tunnel by id
+ */
+static struct pppol2tp_tunnel *pppol2tp_tunnel_find(u16 tunnel_id)
+{
+	struct pppol2tp_tunnel *tunnel = NULL;
+
+	read_lock(&pppol2tp_tunnel_list_lock);
+	list_for_each_entry(tunnel, &pppol2tp_tunnel_list, list) {
+		if (tunnel->stats.tunnel_id == tunnel_id) {
+			read_unlock(&pppol2tp_tunnel_list_lock);
+			return tunnel;
+		}
+	}
+	read_unlock(&pppol2tp_tunnel_list_lock);
+
+	return NULL;
+}
+
+/*****************************************************************************
+ * Receive data handling
+ *****************************************************************************/
+
+/* Queue a skb in order. We come here only if the skb has an L2TP sequence
+ * number.
+ */
+static void pppol2tp_recv_queue_skb(struct pppol2tp_session *session, struct sk_buff *skb)
+{
+	struct sk_buff *skbp;
+	u16 ns = PPPOL2TP_SKB_CB(skb)->ns;
+
+	spin_lock(&session->reorder_q.lock);
+	skb_queue_walk(&session->reorder_q, skbp) {
+		if (PPPOL2TP_SKB_CB(skbp)->ns > ns) {
+			__skb_insert(skb, skbp->prev, skbp, &session->reorder_q);
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
+			       session->name, ns, PPPOL2TP_SKB_CB(skbp)->ns,
+			       skb_queue_len(&session->reorder_q));
+			session->stats.rx_oos_packets++;
+			goto out;
+		}
+	}
+
+	__skb_queue_tail(&session->reorder_q, skb);
+
+out:
+	spin_unlock(&session->reorder_q.lock);
+}
+
+/* Dequeue a single skb.
+ */
+static void pppol2tp_recv_dequeue_skb(struct pppol2tp_session *session, struct sk_buff *skb)
+{
+	struct pppol2tp_tunnel *tunnel = session->tunnel;
+	int length = PPPOL2TP_SKB_CB(skb)->length;
+	struct sock *session_sock = NULL;
+
+	/* We're about to requeue the skb, so unlink it and return resources
+	 * to its current owner (a socket receive buffer).
+	 */
+	skb_unlink(skb, &session->reorder_q);
+	skb_orphan(skb);
+
+	tunnel->stats.rx_packets++;
+	tunnel->stats.rx_bytes += length;
+	session->stats.rx_packets++;
+	session->stats.rx_bytes += length;
+
+	if (PPPOL2TP_SKB_CB(skb)->has_seq) {
+		/* Bump our Nr */
+		session->nr++;
+		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: updated nr to %hu\n", session->name, session->nr);
+	}
+
+	/* If the socket is bound, send it in to PPP's input queue. Otherwise
+	 * queue it on the session socket.
+	 */
+	session_sock = session->sock;
+	if (session_sock->sk_state & PPPOX_BOUND) {
+		struct pppox_sock *po;
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: recv %d byte data frame, passing to ppp\n",
+		       session->name, length);
+
+		/* We need to forget all info related to the L2TP packet
+		 * gathered in the skb as we are going to reuse the same
+		 * skb for the inner packet.
+		 * Namely we need to:
+		 * - reset xfrm (IPSec) information as it applies to
+		 *   the outer L2TP packet and not to the inner one
+		 * - release the dst to force a route lookup on the inner
+		 *   IP packet since skb->dst currently points to the dst
+		 *   of the UDP tunnel
+		 * - reset netfilter information as it doesn't apply
+		 *   to the inner packet either
+		 */
+		secpath_reset(skb);
+		dst_release(skb->dst);
+		skb->dst = NULL;
+		nf_reset(skb);
+
+		po = pppox_sk(session_sock);
+		ppp_input(&po->chan, skb);
+	} else {
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+		       "%s: socket not bound\n", session->name);
+
+		/* Not bound. Nothing we can do, so discard. */
+		session->stats.rx_errors++;
+		kfree_skb(skb);
+	}
+
+	sock_put(session->sock);
+}
+
+/* Dequeue skbs from the session's reorder_q, subject to packet order.
+ * Skbs that have been in the queue for too long are simply discarded.
+ */
+static void pppol2tp_recv_dequeue(struct pppol2tp_session *session)
+{
+	struct sk_buff *skb;
+	struct sk_buff *tmp;
+
+	/* If the pkt at the head of the queue has the nr that we
+	 * expect to send up next, dequeue it and any other
+	 * in-sequence packets behind it.
+	 */
+	spin_lock(&session->reorder_q.lock);
+	skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
+		if (time_after(jiffies, PPPOL2TP_SKB_CB(skb)->expires)) {
+			session->stats.rx_seq_discards++;
+			session->stats.rx_errors++;
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: oos pkt %hu len %d discarded (too old), "
+			       "waiting for %hu, reorder_q_len=%d\n",
+			       session->name, PPPOL2TP_SKB_CB(skb)->ns,
+			       PPPOL2TP_SKB_CB(skb)->length, session->nr,
+			       skb_queue_len(&session->reorder_q));
+			__skb_unlink(skb, &session->reorder_q);
+			kfree_skb(skb);
+			continue;
+		}
+
+		if (PPPOL2TP_SKB_CB(skb)->has_seq) {
+			if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) {
+				PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+				       "%s: holding oos pkt %hu len %d, "
+				       "waiting for %hu, reorder_q_len=%d\n",
+				       session->name, PPPOL2TP_SKB_CB(skb)->ns,
+				       PPPOL2TP_SKB_CB(skb)->length, session->nr,
+				       skb_queue_len(&session->reorder_q));
+				goto out;
+			}
+		}
+		spin_unlock(&session->reorder_q.lock);
+		pppol2tp_recv_dequeue_skb(session, skb);
+		spin_lock(&session->reorder_q.lock);
+	}
+
+out:
+	spin_unlock(&session->reorder_q.lock);
+}
+
+/* Internal receive frame. Do the real work of receiving an L2TP data frame
+ * here. The skb is not on a list when we get here.
+ * Returns 0 if the packet was a data packet and was successfully passed on.
+ * Returns 1 if the packet was not a good data packet and could not be
+ * forwarded.  All such packets are passed up to userspace to deal with.
+ */
+static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb)
+{
+	struct pppol2tp_session *session = NULL;
+	struct pppol2tp_tunnel *tunnel;
+	unsigned char *ptr;
+	u16 hdrflags;
+	u16 tunnel_id, session_id;
+	int length;
+	struct udphdr *uh;
+
+	tunnel = pppol2tp_sock_to_tunnel(sock);
+	if (tunnel == NULL)
+		goto error;
+
+	/* Short packet? */
+	if (skb->len < sizeof(struct udphdr)) {
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
+		goto error;
+	}
+
+	/* Point to L2TP header */
+	ptr = skb->data + sizeof(struct udphdr);
+
+	/* Get L2TP header flags */
+	hdrflags = ntohs(*(__be16*)ptr);
+
+	/* Trace packet contents, if enabled */
+	if (tunnel->debug & PPPOL2TP_MSG_DATA) {
+		printk(KERN_DEBUG "%s: recv: ", tunnel->name);
+
+		for (length = 0; length < 16; length++)
+			printk(" %02X", ptr[length]);
+		printk("\n");
+	}
+
+	/* Get length of L2TP packet */
+	uh = (struct udphdr *) skb_transport_header(skb);
+	length = ntohs(uh->len) - sizeof(struct udphdr);
+
+	/* Too short? */
+	if (length < 12) {
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+		       "%s: recv short L2TP packet (len=%d)\n", tunnel->name, length);
+		goto error;
+	}
+
+	/* If type is control packet, it is handled by userspace. */
+	if (hdrflags & L2TP_HDRFLAG_T) {
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: recv control packet, len=%d\n", tunnel->name, length);
+		goto error;
+	}
+
+	/* Skip flags */
+	ptr += 2;
+
+	/* If length is present, skip it */
+	if (hdrflags & L2TP_HDRFLAG_L)
+		ptr += 2;
+
+	/* Extract tunnel and session ID */
+	tunnel_id = ntohs(*(__be16 *) ptr);
+	ptr += 2;
+	session_id = ntohs(*(__be16 *) ptr);
+	ptr += 2;
+
+	/* Find the session context */
+	session = pppol2tp_session_find(tunnel, session_id);
+	if (!session) {
+		/* Not found? Pass to userspace to deal with */
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+		       "%s: no socket found (%hu/%hu). Passing up.\n",
+		       tunnel->name, tunnel_id, session_id);
+		goto error;
+	}
+	sock_hold(session->sock);
+
+	/* The ref count on the socket was increased by the above call since
+	 * we now hold a pointer to the session. Take care to do sock_put()
+	 * when exiting this function from now on...
+	 */
+
+	/* Handle the optional sequence numbers.  If we are the LAC,
+	 * enable/disable sequence numbers under the control of the LNS.  If
+	 * no sequence numbers present but we were expecting them, discard
+	 * frame.
+	 */
+	if (hdrflags & L2TP_HDRFLAG_S) {
+		u16 ns, nr;
+		ns = ntohs(*(__be16 *) ptr);
+		ptr += 2;
+		nr = ntohs(*(__be16 *) ptr);
+		ptr += 2;
+
+		/* Received a packet with sequence numbers. If we're the LNS,
+		 * check if we sre sending sequence numbers and if not,
+		 * configure it so.
+		 */
+		if ((!session->lns_mode) && (!session->send_seq)) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO,
+			       "%s: requested to enable seq numbers by LNS\n",
+			       session->name);
+			session->send_seq = -1;
+		}
+
+		/* Store L2TP info in the skb */
+		PPPOL2TP_SKB_CB(skb)->ns = ns;
+		PPPOL2TP_SKB_CB(skb)->nr = nr;
+		PPPOL2TP_SKB_CB(skb)->has_seq = 1;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: recv data ns=%hu, nr=%hu, session nr=%hu\n",
+		       session->name, ns, nr, session->nr);
+	} else {
+		/* No sequence numbers.
+		 * If user has configured mandatory sequence numbers, discard.
+		 */
+		if (session->recv_seq) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING,
+			       "%s: recv data has no seq numbers when required. "
+			       "Discarding\n", session->name);
+			session->stats.rx_seq_discards++;
+			session->stats.rx_errors++;
+			goto discard;
+		}
+
+		/* If we're the LAC and we're sending sequence numbers, the
+		 * LNS has requested that we no longer send sequence numbers.
+		 * If we're the LNS and we're sending sequence numbers, the
+		 * LAC is broken. Discard the frame.
+		 */
+		if ((!session->lns_mode) && (session->send_seq)) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO,
+			       "%s: requested to disable seq numbers by LNS\n",
+			       session->name);
+			session->send_seq = 0;
+		} else if (session->send_seq) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING,
+			       "%s: recv data has no seq numbers when required. "
+			       "Discarding\n", session->name);
+			session->stats.rx_seq_discards++;
+			session->stats.rx_errors++;
+			goto discard;
+		}
+
+		/* Store L2TP info in the skb */
+		PPPOL2TP_SKB_CB(skb)->has_seq = 0;
+	}
+
+	/* If offset bit set, skip it. */
+	if (hdrflags & L2TP_HDRFLAG_O)
+		ptr += 2 + ntohs(*(__be16 *) ptr);
+
+	skb_pull(skb, ptr - skb->data);
+
+	/* Skip PPP header, if present.	 In testing, Microsoft L2TP clients
+	 * don't send the PPP header (PPP header compression enabled), but
+	 * other clients can include the header. So we cope with both cases
+	 * here. The PPP header is always FF03 when using L2TP.
+	 *
+	 * Note that skb->data[] isn't dereferenced from a u16 ptr here since
+	 * the field may be unaligned.
+	 */
+	if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+		skb_pull(skb, 2);
+
+	/* Prepare skb for adding to the session's reorder_q.  Hold
+	 * packets for max reorder_timeout or 1 second if not
+	 * reordering.
+	 */
+	PPPOL2TP_SKB_CB(skb)->length = length;
+	PPPOL2TP_SKB_CB(skb)->expires = jiffies +
+		(session->reorder_timeout ? session->reorder_timeout : HZ);
+
+	/* Add packet to the session's receive queue. Reordering is done here, if
+	 * enabled. Saved L2TP protocol info is stored in skb->sb[].
+	 */
+	if (PPPOL2TP_SKB_CB(skb)->has_seq) {
+		if (session->reorder_timeout != 0) {
+			/* Packet reordering enabled. Add skb to session's
+			 * reorder queue, in order of ns.
+			 */
+			pppol2tp_recv_queue_skb(session, skb);
+		} else {
+			/* Packet reordering disabled. Discard out-of-sequence
+			 * packets
+			 */
+			if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) {
+				session->stats.rx_seq_discards++;
+				session->stats.rx_errors++;
+				PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+				       "%s: oos pkt %hu len %d discarded, "
+				       "waiting for %hu, reorder_q_len=%d\n",
+				       session->name, PPPOL2TP_SKB_CB(skb)->ns,
+				       PPPOL2TP_SKB_CB(skb)->length, session->nr,
+				       skb_queue_len(&session->reorder_q));
+				goto discard;
+			}
+			skb_queue_tail(&session->reorder_q, skb);
+		}
+	} else {
+		/* No sequence numbers. Add the skb to the tail of the
+		 * reorder queue. This ensures that it will be
+		 * delivered after all previous sequenced skbs.
+		 */
+		skb_queue_tail(&session->reorder_q, skb);
+	}
+
+	/* Try to dequeue as many skbs from reorder_q as we can. */
+	pppol2tp_recv_dequeue(session);
+
+	return 0;
+
+discard:
+	kfree_skb(skb);
+	sock_put(session->sock);
+
+	return 0;
+
+error:
+	return 1;
+}
+
+/* UDP encapsulation receive handler. See net/ipv4/udp.c.
+ * Return codes:
+ * 0 : success.
+ * <0: error
+ * >0: skb should be passed up to userspace as UDP.
+ */
+static int pppol2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+{
+	struct pppol2tp_tunnel *tunnel;
+
+	tunnel = pppol2tp_sock_to_tunnel(sk);
+	if (tunnel == NULL)
+		goto pass_up;
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+	       "%s: received %d bytes\n", tunnel->name, skb->len);
+
+	if (pppol2tp_recv_core(sk, skb))
+		goto pass_up;
+
+	return 0;
+
+pass_up:
+	return 1;
+}
+
+/* Receive message. This is the recvmsg for the PPPoL2TP socket.
+ */
+static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
+			    struct msghdr *msg, size_t len,
+			    int flags)
+{
+	int err;
+	struct sk_buff *skb;
+	struct sock *sk = sock->sk;
+
+	err = -EIO;
+	if (sk->sk_state & PPPOX_BOUND)
+		goto end;
+
+	msg->msg_namelen = 0;
+
+	err = 0;
+	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+				flags & MSG_DONTWAIT, &err);
+	if (skb) {
+		err = memcpy_toiovec(msg->msg_iov, (unsigned char *) skb->data,
+				     skb->len);
+		if (err < 0)
+			goto do_skb_free;
+		err = skb->len;
+	}
+do_skb_free:
+	kfree_skb(skb);
+end:
+	return err;
+}
+
+/************************************************************************
+ * Transmit handling
+ ***********************************************************************/
+
+/* Tell how big L2TP headers are for a particular session. This
+ * depends on whether sequence numbers are being used.
+ */
+static inline int pppol2tp_l2tp_header_len(struct pppol2tp_session *session)
+{
+	if (session->send_seq)
+		return PPPOL2TP_L2TP_HDR_SIZE_SEQ;
+
+	return PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+}
+
+/* Build an L2TP header for the session into the buffer provided.
+ */
+static void pppol2tp_build_l2tp_header(struct pppol2tp_session *session,
+				       void *buf)
+{
+	__be16 *bufp = buf;
+	u16 flags = L2TP_HDR_VER;
+
+	if (session->send_seq)
+		flags |= L2TP_HDRFLAG_S;
+
+	/* Setup L2TP header.
+	 * FIXME: Can this ever be unaligned? Is direct dereferencing of
+	 * 16-bit header fields safe here for all architectures?
+	 */
+	*bufp++ = htons(flags);
+	*bufp++ = htons(session->tunnel_addr.d_tunnel);
+	*bufp++ = htons(session->tunnel_addr.d_session);
+	if (session->send_seq) {
+		*bufp++ = htons(session->ns);
+		*bufp++ = 0;
+		session->ns++;
+		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: updated ns to %hu\n", session->name, session->ns);
+	}
+}
+
+/* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
+ * when a user application does a sendmsg() on the session socket. L2TP and
+ * PPP headers must be inserted into the user's data.
+ */
+static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+			    size_t total_len)
+{
+	static const unsigned char ppph[2] = { 0xff, 0x03 };
+	struct sock *sk = sock->sk;
+	struct inet_sock *inet;
+	__wsum csum = 0;
+	struct sk_buff *skb;
+	int error;
+	int hdr_len;
+	struct pppol2tp_session *session;
+	struct pppol2tp_tunnel *tunnel;
+	struct udphdr *uh;
+
+	error = -ENOTCONN;
+	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+		goto error;
+
+	/* Get session and tunnel contexts */
+	error = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto error;
+
+	tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
+	if (tunnel == NULL)
+		goto error;
+
+	/* What header length is configured for this session? */
+	hdr_len = pppol2tp_l2tp_header_len(session);
+
+	/* Allocate a socket buffer */
+	error = -ENOMEM;
+	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
+			   sizeof(struct udphdr) + hdr_len +
+			   sizeof(ppph) + total_len,
+			   0, GFP_KERNEL);
+	if (!skb)
+		goto error;
+
+	/* Reserve space for headers. */
+	skb_reserve(skb, NET_SKB_PAD);
+	skb_reset_network_header(skb);
+	skb_reserve(skb, sizeof(struct iphdr));
+	skb_reset_transport_header(skb);
+
+	/* Build UDP header */
+	inet = inet_sk(session->tunnel_sock);
+	uh = (struct udphdr *) skb->data;
+	uh->source = inet->sport;
+	uh->dest = inet->dport;
+	uh->len = htons(hdr_len + sizeof(ppph) + total_len);
+	uh->check = 0;
+	skb_put(skb, sizeof(struct udphdr));
+
+	/* Build L2TP header */
+	pppol2tp_build_l2tp_header(session, skb->data);
+	skb_put(skb, hdr_len);
+
+	/* Add PPP header */
+	skb->data[0] = ppph[0];
+	skb->data[1] = ppph[1];
+	skb_put(skb, 2);
+
+	/* Copy user data into skb */
+	error = memcpy_fromiovec(skb->data, m->msg_iov, total_len);
+	if (error < 0) {
+		kfree_skb(skb);
+		goto error;
+	}
+	skb_put(skb, total_len);
+
+	/* Calculate UDP checksum if configured to do so */
+	if (session->tunnel_sock->sk_no_check != UDP_CSUM_NOXMIT)
+		csum = udp_csum_outgoing(sk, skb);
+
+	/* Debug */
+	if (session->send_seq)
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %d bytes, ns=%hu\n", session->name,
+		       total_len, session->ns - 1);
+	else
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %d bytes\n", session->name, total_len);
+
+	if (session->debug & PPPOL2TP_MSG_DATA) {
+		int i;
+		unsigned char *datap = skb->data;
+
+		printk(KERN_DEBUG "%s: xmit:", session->name);
+		for (i = 0; i < total_len; i++) {
+			printk(" %02X", *datap++);
+			if (i == 15) {
+				printk(" ...");
+				break;
+			}
+		}
+		printk("\n");
+	}
+
+	/* Queue the packet to IP for output */
+	error = ip_queue_xmit(skb, 1);
+
+	/* Update stats */
+	if (error >= 0) {
+		tunnel->stats.tx_packets++;
+		tunnel->stats.tx_bytes += skb->len;
+		session->stats.tx_packets++;
+		session->stats.tx_bytes += skb->len;
+	} else {
+		tunnel->stats.tx_errors++;
+		session->stats.tx_errors++;
+	}
+
+error:
+	return error;
+}
+
+/* Transmit function called by generic PPP driver.  Sends PPP frame
+ * over PPPoL2TP socket.
+ *
+ * This is almost the same as pppol2tp_sendmsg(), but rather than
+ * being called with a msghdr from userspace, it is called with a skb
+ * from the kernel.
+ *
+ * The supplied skb from ppp doesn't have enough headroom for the
+ * insertion of L2TP, UDP and IP headers so we need to allocate more
+ * headroom in the skb. This will create a cloned skb. But we must be
+ * careful in the error case because the caller will expect to free
+ * the skb it supplied, not our cloned skb. So we take care to always
+ * leave the original skb unfreed if we return an error.
+ */
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+{
+	static const u8 ppph[2] = { 0xff, 0x03 };
+	struct sock *sk = (struct sock *) chan->private;
+	struct sock *sk_tun;
+	int hdr_len;
+	struct pppol2tp_session *session;
+	struct pppol2tp_tunnel *tunnel;
+	int rc;
+	int headroom;
+	int data_len = skb->len;
+	struct inet_sock *inet;
+	__wsum csum = 0;
+	struct sk_buff *skb2 = NULL;
+	struct udphdr *uh;
+
+	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+		goto abort;
+
+	/* Get session and tunnel contexts from the socket */
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto abort;
+
+	sk_tun = session->tunnel_sock;
+	if (sk_tun == NULL)
+		goto abort;
+	tunnel = pppol2tp_sock_to_tunnel(sk_tun);
+	if (tunnel == NULL)
+		goto abort;
+
+	/* What header length is configured for this session? */
+	hdr_len = pppol2tp_l2tp_header_len(session);
+
+	/* Check that there's enough headroom in the skb to insert IP,
+	 * UDP and L2TP and PPP headers. If not enough, expand it to
+	 * make room. Note that a new skb (or a clone) is
+	 * allocated. If we return an error from this point on, make
+	 * sure we free the new skb but do not free the original skb
+	 * since that is done by the caller for the error case.
+	 */
+	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
+		sizeof(struct udphdr) + hdr_len + sizeof(ppph);
+	if (skb_headroom(skb) < headroom) {
+		skb2 = skb_realloc_headroom(skb, headroom);
+		if (skb2 == NULL)
+			goto abort;
+	} else
+		skb2 = skb;
+
+	/* Check that the socket has room */
+	if (atomic_read(&sk_tun->sk_wmem_alloc) < sk_tun->sk_sndbuf)
+		skb_set_owner_w(skb2, sk_tun);
+	else
+		goto discard;
+
+	/* Setup PPP header */
+	skb_push(skb2, sizeof(ppph));
+	skb2->data[0] = ppph[0];
+	skb2->data[1] = ppph[1];
+
+	/* Setup L2TP header */
+	skb_push(skb2, hdr_len);
+	pppol2tp_build_l2tp_header(session, skb2->data);
+
+	/* Setup UDP header */
+	inet = inet_sk(sk_tun);
+	skb_push(skb2, sizeof(struct udphdr));
+	skb_reset_transport_header(skb2);
+	uh = (struct udphdr *) skb2->data;
+	uh->source = inet->sport;
+	uh->dest = inet->dport;
+	uh->len = htons(sizeof(struct udphdr) + hdr_len + sizeof(ppph) + data_len);
+	uh->check = 0;
+
+	/* Calculate UDP checksum if configured to do so */
+	if (sk_tun->sk_no_check != UDP_CSUM_NOXMIT)
+		csum = udp_csum_outgoing(sk_tun, skb2);
+
+	/* Debug */
+	if (session->send_seq)
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %d bytes, ns=%hu\n", session->name,
+		       data_len, session->ns - 1);
+	else
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %d bytes\n", session->name, data_len);
+
+	if (session->debug & PPPOL2TP_MSG_DATA) {
+		int i;
+		unsigned char *datap = skb2->data;
+
+		printk(KERN_DEBUG "%s: xmit:", session->name);
+		for (i = 0; i < data_len; i++) {
+			printk(" %02X", *datap++);
+			if (i == 31) {
+				printk(" ...");
+				break;
+			}
+		}
+		printk("\n");
+	}
+
+	/* Get routing info from the tunnel socket */
+	skb2->dst = sk_dst_get(sk_tun);
+
+	/* Queue the packet to IP for output */
+	rc = ip_queue_xmit(skb2, 1);
+
+	/* Update stats */
+	if (rc >= 0) {
+		tunnel->stats.tx_packets++;
+		tunnel->stats.tx_bytes += skb2->len;
+		session->stats.tx_packets++;
+		session->stats.tx_bytes += skb2->len;
+	} else {
+		tunnel->stats.tx_errors++;
+		session->stats.tx_errors++;
+	}
+
+	/* Free the original skb */
+	kfree_skb(skb);
+
+	return 1;
+
+discard:
+	/* Free the new skb. Caller will free original skb. */
+	if (skb2 != skb)
+		kfree_skb(skb2);
+abort:
+	return 0;
+}
+
+/*****************************************************************************
+ * Session (and tunnel control) socket create/destroy.
+ *****************************************************************************/
+
+/* When the tunnel UDP socket is closed, all the attached sockets need to go
+ * too.
+ */
+static void pppol2tp_tunnel_closeall(struct pppol2tp_tunnel *tunnel)
+{
+	int hash;
+	struct hlist_node *walk;
+	struct hlist_node *tmp;
+	struct pppol2tp_session *session;
+	struct sock *sk;
+
+	if (tunnel == NULL)
+		BUG();
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: closing all sessions...\n", tunnel->name);
+
+	write_lock(&tunnel->hlist_lock);
+	for (hash = 0; hash < PPPOL2TP_HASH_SIZE; hash++) {
+again:
+		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
+			session = hlist_entry(walk, struct pppol2tp_session, hlist);
+
+			sk = session->sock;
+
+			PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+			       "%s: closing session\n", session->name);
+
+			hlist_del_init(&session->hlist);
+
+			/* Since we should hold the sock lock while
+			 * doing any unbinding, we need to release the
+			 * lock we're holding before taking that lock.
+			 * Hold a reference to the sock so it doesn't
+			 * disappear as we're jumping between locks.
+			 */
+			sock_hold(sk);
+			write_unlock(&tunnel->hlist_lock);
+			lock_sock(sk);
+
+			if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+				pppox_unbind_sock(sk);
+				sk->sk_state = PPPOX_DEAD;
+				sk->sk_state_change(sk);
+			}
+
+			/* Purge any queued data */
+			skb_queue_purge(&sk->sk_receive_queue);
+			skb_queue_purge(&sk->sk_write_queue);
+			skb_queue_purge(&session->reorder_q);
+
+			release_sock(sk);
+			sock_put(sk);
+
+			/* Now restart from the beginning of this hash
+			 * chain.  We always remove a session from the
+			 * list so we are guaranteed to make forward
+			 * progress.
+			 */
+			write_lock(&tunnel->hlist_lock);
+			goto again;
+		}
+	}
+	write_unlock(&tunnel->hlist_lock);
+}
+
+/* Really kill the tunnel.
+ * Come here only when all sessions have been cleared from the tunnel.
+ */
+static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel)
+{
+	/* Remove from socket list */
+	write_lock(&pppol2tp_tunnel_list_lock);
+	list_del_init(&tunnel->list);
+	write_unlock(&pppol2tp_tunnel_list_lock);
+
+	atomic_dec(&pppol2tp_tunnel_count);
+	kfree(tunnel);
+}
+
+/* Tunnel UDP socket destruct hook.
+ * The tunnel context is deleted only when all session sockets have been
+ * closed.
+ */
+static void pppol2tp_tunnel_destruct(struct sock *sk)
+{
+	struct pppol2tp_tunnel *tunnel;
+
+	tunnel = pppol2tp_sock_to_tunnel(sk);
+	if (tunnel == NULL)
+		goto end;
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: closing...\n", tunnel->name);
+
+	/* Close all sessions */
+	pppol2tp_tunnel_closeall(tunnel);
+
+	/* No longer an encapsulation socket. See net/ipv4/udp.c */
+	(udp_sk(sk))->encap_type = 0;
+	(udp_sk(sk))->encap_rcv = NULL;
+
+	/* Remove hooks into tunnel socket */
+	tunnel->sock = NULL;
+	sk->sk_destruct = tunnel->old_sk_destruct;
+	sk->sk_user_data = NULL;
+
+	/* Call original (UDP) socket descructor */
+	if (sk->sk_destruct != NULL)
+		(*sk->sk_destruct)(sk);
+
+	pppol2tp_tunnel_dec_refcount(tunnel);
+
+end:
+	return;
+}
+
+/* Really kill the session socket. (Called from sock_put() if
+ * refcnt == 0.)
+ */
+static void pppol2tp_session_destruct(struct sock *sk)
+{
+	struct pppol2tp_session *session = NULL;
+
+	if (sk->sk_user_data != NULL) {
+		struct pppol2tp_tunnel *tunnel;
+
+		session = pppol2tp_sock_to_session(sk);
+		if (session == NULL)
+			goto out;
+
+		/* Don't use pppol2tp_sock_to_tunnel() here to
+		 * get the tunnel context because the tunnel
+		 * socket might have already been closed (its
+		 * sk->sk_user_data will be NULL) so use the
+		 * session's private tunnel ptr instead.
+		 */
+		tunnel = session->tunnel;
+		if (tunnel != NULL) {
+			BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+
+			/* If session_id is zero, this is a null
+			 * session context, which was created for a
+			 * socket that is being used only to manage
+			 * tunnels.
+			 */
+			if (session->tunnel_addr.s_session != 0) {
+				/* Delete the session socket from the
+				 * hash
+				 */
+				write_lock(&tunnel->hlist_lock);
+				hlist_del_init(&session->hlist);
+				write_unlock(&tunnel->hlist_lock);
+
+				atomic_dec(&pppol2tp_session_count);
+			}
+
+			/* This will delete the tunnel context if this
+			 * is the last session on the tunnel.
+			 */
+			session->tunnel = NULL;
+			session->tunnel_sock = NULL;
+			pppol2tp_tunnel_dec_refcount(tunnel);
+		}
+	}
+
+	kfree(session);
+out:
+	return;
+}
+
+/* Called when the PPPoX socket (session) is closed.
+ */
+static int pppol2tp_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	int error;
+
+	if (!sk)
+		return 0;
+
+	error = -EBADF;
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_DEAD) != 0)
+		goto error;
+
+	pppox_unbind_sock(sk);
+
+	/* Signal the death of the socket. */
+	sk->sk_state = PPPOX_DEAD;
+	sock_orphan(sk);
+	sock->sk = NULL;
+
+	/* Purge any queued data */
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+
+	release_sock(sk);
+
+	/* This will delete the session context via
+	 * pppol2tp_session_destruct() if the socket's refcnt drops to
+	 * zero.
+	 */
+	sock_put(sk);
+
+	return 0;
+
+error:
+	release_sock(sk);
+	return error;
+}
+
+/* Internal function to prepare a tunnel (UDP) socket to have PPPoX
+ * sockets attached to it.
+ */
+static struct sock *pppol2tp_prepare_tunnel_socket(int fd, u16 tunnel_id,
+						   int *error)
+{
+	int err;
+	struct socket *sock = NULL;
+	struct sock *sk;
+	struct pppol2tp_tunnel *tunnel;
+	struct sock *ret = NULL;
+
+	/* Get the tunnel UDP socket from the fd, which was opened by
+	 * the userspace L2TP daemon.
+	 */
+	err = -EBADF;
+	sock = sockfd_lookup(fd, &err);
+	if (!sock) {
+		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
+		       "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
+		       tunnel_id, fd, err);
+		goto err;
+	}
+
+	/* Quick sanity checks */
+	err = -ESOCKTNOSUPPORT;
+	if (sock->type != SOCK_DGRAM) {
+		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
+		       "tunl %hu: fd %d wrong type, got %d, expected %d\n",
+		       tunnel_id, fd, sock->type, SOCK_DGRAM);
+		goto err;
+	}
+	err = -EAFNOSUPPORT;
+	if (sock->ops->family != AF_INET) {
+		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
+		       "tunl %hu: fd %d wrong family, got %d, expected %d\n",
+		       tunnel_id, fd, sock->ops->family, AF_INET);
+		goto err;
+	}
+
+	err = -ENOTCONN;
+	sk = sock->sk;
+
+	/* Check if this socket has already been prepped */
+	tunnel = (struct pppol2tp_tunnel *)sk->sk_user_data;
+	if (tunnel != NULL) {
+		/* User-data field already set */
+		err = -EBUSY;
+		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+
+		/* This socket has already been prepped */
+		ret = tunnel->sock;
+		goto out;
+	}
+
+	/* This socket is available and needs prepping. Create a new tunnel
+	 * context and init it.
+	 */
+	sk->sk_user_data = tunnel = kzalloc(sizeof(struct pppol2tp_tunnel), GFP_KERNEL);
+	if (sk->sk_user_data == NULL) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	tunnel->magic = L2TP_TUNNEL_MAGIC;
+	sprintf(&tunnel->name[0], "tunl %hu", tunnel_id);
+
+	tunnel->stats.tunnel_id = tunnel_id;
+	tunnel->debug = PPPOL2TP_DEFAULT_DEBUG_FLAGS;
+
+	/* Hook on the tunnel socket destructor so that we can cleanup
+	 * if the tunnel socket goes away.
+	 */
+	tunnel->old_sk_destruct = sk->sk_destruct;
+	sk->sk_destruct = &pppol2tp_tunnel_destruct;
+
+	tunnel->sock = sk;
+	sk->sk_allocation = GFP_ATOMIC;
+
+	/* Misc init */
+	rwlock_init(&tunnel->hlist_lock);
+
+	/* Add tunnel to our list */
+	INIT_LIST_HEAD(&tunnel->list);
+	write_lock(&pppol2tp_tunnel_list_lock);
+	list_add(&tunnel->list, &pppol2tp_tunnel_list);
+	write_unlock(&pppol2tp_tunnel_list_lock);
+	atomic_inc(&pppol2tp_tunnel_count);
+
+	/* Bump the reference count. The tunnel context is deleted
+	 * only when this drops to zero.
+	 */
+	pppol2tp_tunnel_inc_refcount(tunnel);
+
+	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+	(udp_sk(sk))->encap_type = UDP_ENCAP_L2TPINUDP;
+	(udp_sk(sk))->encap_rcv = pppol2tp_udp_encap_recv;
+
+	ret = tunnel->sock;
+
+	*error = 0;
+out:
+	if (sock)
+		sockfd_put(sock);
+
+	return ret;
+
+err:
+	*error = err;
+	goto out;
+}
+
+static struct proto pppol2tp_sk_proto = {
+	.name	  = "PPPOL2TP",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct pppox_sock),
+};
+
+/* socket() handler. Initialize a new struct sock.
+ */
+static int pppol2tp_create(struct socket *sock)
+{
+	int error = -ENOMEM;
+	struct sock *sk;
+
+	sk = sk_alloc(PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto, 1);
+	if (!sk)
+		goto out;
+
+	sock_init_data(sock, sk);
+
+	sock->state  = SS_UNCONNECTED;
+	sock->ops    = &pppol2tp_ops;
+
+	sk->sk_backlog_rcv = pppol2tp_recv_core;
+	sk->sk_protocol	   = PX_PROTO_OL2TP;
+	sk->sk_family	   = PF_PPPOX;
+	sk->sk_state	   = PPPOX_NONE;
+	sk->sk_type	   = SOCK_STREAM;
+	sk->sk_destruct	   = pppol2tp_session_destruct;
+
+	error = 0;
+
+out:
+	return error;
+}
+
+/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
+ */
+static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
+			    int sockaddr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct sock *tunnel_sock = NULL;
+	struct pppol2tp_session *session = NULL;
+	struct pppol2tp_tunnel *tunnel;
+	struct dst_entry *dst;
+	int error = 0;
+
+	lock_sock(sk);
+
+	error = -EINVAL;
+	if (sp->sa_protocol != PX_PROTO_OL2TP)
+		goto end;
+
+	/* Check for already bound sockets */
+	error = -EBUSY;
+	if (sk->sk_state & PPPOX_CONNECTED)
+		goto end;
+
+	/* We don't supporting rebinding anyway */
+	error = -EALREADY;
+	if (sk->sk_user_data)
+		goto end; /* socket is already attached */
+
+	/* Don't bind if s_tunnel is 0 */
+	error = -EINVAL;
+	if (sp->pppol2tp.s_tunnel == 0)
+		goto end;
+
+	/* Special case: prepare tunnel socket if s_session and
+	 * d_session is 0. Otherwise look up tunnel using supplied
+	 * tunnel id.
+	 */
+	if ((sp->pppol2tp.s_session == 0) && (sp->pppol2tp.d_session == 0)) {
+		tunnel_sock = pppol2tp_prepare_tunnel_socket(sp->pppol2tp.fd,
+							     sp->pppol2tp.s_tunnel,
+							     &error);
+		if (tunnel_sock == NULL)
+			goto end;
+
+		tunnel = tunnel_sock->sk_user_data;
+	} else {
+		tunnel = pppol2tp_tunnel_find(sp->pppol2tp.s_tunnel);
+
+		/* Error if we can't find the tunnel */
+		error = -ENOENT;
+		if (tunnel == NULL)
+			goto end;
+
+		tunnel_sock = tunnel->sock;
+	}
+
+	/* Check that this session doesn't already exist */
+	error = -EEXIST;
+	session = pppol2tp_session_find(tunnel, sp->pppol2tp.s_session);
+	if (session != NULL)
+		goto end;
+
+	/* Allocate and initialize a new session context. */
+	session = kzalloc(sizeof(struct pppol2tp_session), GFP_KERNEL);
+	if (session == NULL) {
+		error = -ENOMEM;
+		goto end;
+	}
+
+	skb_queue_head_init(&session->reorder_q);
+
+	session->magic	     = L2TP_SESSION_MAGIC;
+	session->owner	     = current->pid;
+	session->sock	     = sk;
+	session->tunnel	     = tunnel;
+	session->tunnel_sock = tunnel_sock;
+	session->tunnel_addr = sp->pppol2tp;
+	sprintf(&session->name[0], "sess %hu/%hu",
+		session->tunnel_addr.s_tunnel,
+		session->tunnel_addr.s_session);
+
+	session->stats.tunnel_id  = session->tunnel_addr.s_tunnel;
+	session->stats.session_id = session->tunnel_addr.s_session;
+
+	INIT_HLIST_NODE(&session->hlist);
+
+	/* Inherit debug options from tunnel */
+	session->debug = tunnel->debug;
+
+	/* Default MTU must allow space for UDP/L2TP/PPP
+	 * headers.
+	 */
+	session->mtu = session->mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+
+	/* If PMTU discovery was enabled, use the MTU that was discovered */
+	dst = sk_dst_get(sk);
+	if (dst != NULL) {
+		u32 pmtu = dst_mtu(__sk_dst_get(sk));
+		if (pmtu != 0)
+			session->mtu = session->mru = pmtu -
+				PPPOL2TP_HEADER_OVERHEAD;
+		dst_release(dst);
+	}
+
+	/* Special case: if source & dest session_id == 0x0000, this socket is
+	 * being created to manage the tunnel. Don't add the session to the
+	 * session hash list, just set up the internal context for use by
+	 * ioctl() and sockopt() handlers.
+	 */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0)) {
+		error = 0;
+		sk->sk_user_data = session;
+		goto out_no_ppp;
+	}
+
+	/* Get tunnel context from the tunnel socket */
+	tunnel = pppol2tp_sock_to_tunnel(tunnel_sock);
+	if (tunnel == NULL) {
+		error = -EBADF;
+		goto end;
+	}
+
+	/* Right now, because we don't have a way to push the incoming skb's
+	 * straight through the UDP layer, the only header we need to worry
+	 * about is the L2TP header. This size is different depending on
+	 * whether sequence numbers are enabled for the data channel.
+	 */
+	po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+
+	po->chan.private = sk;
+	po->chan.ops	 = &pppol2tp_chan_ops;
+	po->chan.mtu	 = session->mtu;
+
+	error = ppp_register_channel(&po->chan);
+	if (error)
+		goto end;
+
+	/* This is how we get the session context from the socket. */
+	sk->sk_user_data = session;
+
+	/* Add session to the tunnel's hash list */
+	write_lock(&tunnel->hlist_lock);
+	hlist_add_head(&session->hlist,
+		       pppol2tp_session_id_hash(tunnel,
+						session->tunnel_addr.s_session));
+	write_unlock(&tunnel->hlist_lock);
+
+	atomic_inc(&pppol2tp_session_count);
+
+out_no_ppp:
+	pppol2tp_tunnel_inc_refcount(tunnel);
+	sk->sk_state = PPPOX_CONNECTED;
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: created\n", session->name);
+
+end:
+	release_sock(sk);
+
+	if (error != 0)
+		PRINTK(session ? session->debug : -1, PPPOL2TP_MSG_CONTROL, KERN_WARNING,
+		       "%s: connect failed: %d\n", session->name, error);
+
+	return error;
+}
+
+/* getname() support.
+ */
+static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
+			    int *usockaddr_len, int peer)
+{
+	int len = sizeof(struct sockaddr_pppol2tp);
+	struct sockaddr_pppol2tp sp;
+	int error = 0;
+	struct pppol2tp_session *session;
+
+	error = -ENOTCONN;
+	if (sock->sk->sk_state != PPPOX_CONNECTED)
+		goto end;
+
+	session = pppol2tp_sock_to_session(sock->sk);
+	if (session == NULL) {
+		error = -EBADF;
+		goto end;
+	}
+
+	sp.sa_family	= AF_PPPOX;
+	sp.sa_protocol	= PX_PROTO_OL2TP;
+	memcpy(&sp.pppol2tp, &session->tunnel_addr,
+	       sizeof(struct pppol2tp_addr));
+
+	memcpy(uaddr, &sp, len);
+
+	*usockaddr_len = len;
+
+	error = 0;
+
+end:
+	return error;
+}
+
+/****************************************************************************
+ * ioctl() handlers.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. However, in order to control kernel tunnel features, we allow
+ * userspace to create a special "tunnel" PPPoX socket which is used for
+ * control only.  Tunnel PPPoX sockets have session_id == 0 and simply allow
+ * the user application to issue L2TP setsockopt(), getsockopt() and ioctl()
+ * calls.
+ ****************************************************************************/
+
+/* Session ioctl helper.
+ */
+static int pppol2tp_session_ioctl(struct pppol2tp_session *session,
+				  unsigned int cmd, unsigned long arg)
+{
+	struct ifreq ifr;
+	int err = 0;
+	struct sock *sk = session->sock;
+	int val = (int) arg;
+
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
+	       "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
+	       session->name, cmd, arg);
+
+	sock_hold(sk);
+
+	switch (cmd) {
+	case SIOCGIFMTU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+			break;
+		ifr.ifr_mtu = session->mtu;
+		if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get mtu=%d\n", session->name, session->mtu);
+		err = 0;
+		break;
+
+	case SIOCSIFMTU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+			break;
+
+		session->mtu = ifr.ifr_mtu;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set mtu=%d\n", session->name, session->mtu);
+		err = 0;
+		break;
+
+	case PPPIOCGMRU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (put_user(session->mru, (int __user *) arg))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get mru=%d\n", session->name, session->mru);
+		err = 0;
+		break;
+
+	case PPPIOCSMRU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (get_user(val,(int __user *) arg))
+			break;
+
+		session->mru = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set mru=%d\n", session->name, session->mru);
+		err = 0;
+		break;
+
+	case PPPIOCGFLAGS:
+		err = -EFAULT;
+		if (put_user(session->flags, (int __user *) arg))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get flags=%d\n", session->name, session->flags);
+		err = 0;
+		break;
+
+	case PPPIOCSFLAGS:
+		err = -EFAULT;
+		if (get_user(val, (int __user *) arg))
+			break;
+		session->flags = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set flags=%d\n", session->name, session->flags);
+		err = 0;
+		break;
+
+	case PPPIOCGL2TPSTATS:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		if (copy_to_user((void __user *) arg, &session->stats,
+				 sizeof(session->stats)))
+			break;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get L2TP stats\n", session->name);
+		err = 0;
+		break;
+
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	sock_put(sk);
+
+	return err;
+}
+
+/* Tunnel ioctl helper.
+ *
+ * Note the special handling for PPPIOCGL2TPSTATS below. If the ioctl data
+ * specifies a session_id, the session ioctl handler is called. This allows an
+ * application to retrieve session stats via a tunnel socket.
+ */
+static int pppol2tp_tunnel_ioctl(struct pppol2tp_tunnel *tunnel,
+				 unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct sock *sk = tunnel->sock;
+	struct pppol2tp_ioc_stats stats_req;
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
+	       "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n", tunnel->name,
+	       cmd, arg);
+
+	sock_hold(sk);
+
+	switch (cmd) {
+	case PPPIOCGL2TPSTATS:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		if (copy_from_user(&stats_req, (void __user *) arg,
+				   sizeof(stats_req))) {
+			err = -EFAULT;
+			break;
+		}
+		if (stats_req.session_id != 0) {
+			/* resend to session ioctl handler */
+			struct pppol2tp_session *session =
+				pppol2tp_session_find(tunnel, stats_req.session_id);
+			if (session != NULL)
+				err = pppol2tp_session_ioctl(session, cmd, arg);
+			else
+				err = -EBADR;
+			break;
+		}
+#ifdef CONFIG_XFRM
+		tunnel->stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0;
+#endif
+		if (copy_to_user((void __user *) arg, &tunnel->stats,
+				 sizeof(tunnel->stats))) {
+			err = -EFAULT;
+			break;
+		}
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get L2TP stats\n", tunnel->name);
+		err = 0;
+		break;
+
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	sock_put(sk);
+
+	return err;
+}
+
+/* Main ioctl() handler.
+ * Dispatch to tunnel or session helpers depending on the socket.
+ */
+static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session;
+	struct pppol2tp_tunnel *tunnel;
+	int err;
+
+	if (!sk)
+		return 0;
+
+	err = -EBADF;
+	if (sock_flag(sk, SOCK_DEAD) != 0)
+		goto end;
+
+	err = -ENOTCONN;
+	if ((sk->sk_user_data == NULL) ||
+	    (!(sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND))))
+		goto end;
+
+	/* Get session context from the socket */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session's session_id is zero, treat ioctl as a
+	 * tunnel ioctl
+	 */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0)) {
+		err = -EBADF;
+		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
+		if (tunnel == NULL)
+			goto end;
+
+		err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
+		goto end;
+	}
+
+	err = pppol2tp_session_ioctl(session, cmd, arg);
+
+end:
+	return err;
+}
+
+/*****************************************************************************
+ * setsockopt() / getsockopt() support.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. In order to control kernel tunnel features, we allow userspace to
+ * create a special "tunnel" PPPoX socket which is used for control only.
+ * Tunnel PPPoX sockets have session_id == 0 and simply allow the user
+ * application to issue L2TP setsockopt(), getsockopt() and ioctl() calls.
+ *****************************************************************************/
+
+/* Tunnel setsockopt() helper.
+ */
+static int pppol2tp_tunnel_setsockopt(struct sock *sk,
+				      struct pppol2tp_tunnel *tunnel,
+				      int optname, int val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_DEBUG:
+		tunnel->debug = val;
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set debug=%x\n", tunnel->name, tunnel->debug);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Session setsockopt helper.
+ */
+static int pppol2tp_session_setsockopt(struct sock *sk,
+				       struct pppol2tp_session *session,
+				       int optname, int val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_RECVSEQ:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->recv_seq = val ? -1 : 0;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set recv_seq=%d\n", session->name,
+		       session->recv_seq);
+		break;
+
+	case PPPOL2TP_SO_SENDSEQ:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->send_seq = val ? -1 : 0;
+		{
+			struct sock *ssk      = session->sock;
+			struct pppox_sock *po = pppox_sk(ssk);
+			po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
+				PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+		}
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set send_seq=%d\n", session->name, session->send_seq);
+		break;
+
+	case PPPOL2TP_SO_LNSMODE:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->lns_mode = val ? -1 : 0;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set lns_mode=%d\n", session->name,
+		       session->lns_mode);
+		break;
+
+	case PPPOL2TP_SO_DEBUG:
+		session->debug = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set debug=%x\n", session->name, session->debug);
+		break;
+
+	case PPPOL2TP_SO_REORDERTO:
+		session->reorder_timeout = msecs_to_jiffies(val);
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set reorder_timeout=%d\n", session->name,
+		       session->reorder_timeout);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Main setsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session setsockopt
+ * handler, according to whether the PPPoL2TP socket is a for a regular
+ * session or the special tunnel type.
+ */
+static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session = sk->sk_user_data;
+	struct pppol2tp_tunnel *tunnel;
+	int val;
+	int err;
+
+	if (level != SOL_PPPOL2TP)
+		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	err = -ENOTCONN;
+	if (sk->sk_user_data == NULL)
+		goto end;
+
+	/* Get session context from the socket */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session_id == 0x0000, treat as operation on tunnel
+	 */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0)) {
+		err = -EBADF;
+		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
+		if (tunnel == NULL)
+			goto end;
+
+		err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
+	} else
+		err = pppol2tp_session_setsockopt(sk, session, optname, val);
+
+	err = 0;
+
+end:
+	return err;
+}
+
+/* Tunnel getsockopt helper. Called with sock locked.
+ */
+static int pppol2tp_tunnel_getsockopt(struct sock *sk,
+				      struct pppol2tp_tunnel *tunnel,
+				      int optname, int __user *val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_DEBUG:
+		*val = tunnel->debug;
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get debug=%x\n", tunnel->name, tunnel->debug);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Session getsockopt helper. Called with sock locked.
+ */
+static int pppol2tp_session_getsockopt(struct sock *sk,
+				       struct pppol2tp_session *session,
+				       int optname, int __user *val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_RECVSEQ:
+		*val = session->recv_seq;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get recv_seq=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_SENDSEQ:
+		*val = session->send_seq;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get send_seq=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_LNSMODE:
+		*val = session->lns_mode;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get lns_mode=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_DEBUG:
+		*val = session->debug;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get debug=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_REORDERTO:
+		*val = (int) jiffies_to_msecs(session->reorder_timeout);
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get reorder_timeout=%d\n", session->name, *val);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+	}
+
+	return err;
+}
+
+/* Main getsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session getsockopt
+ * handler, according to whether the PPPoX socket is a for a regular session
+ * or the special tunnel type.
+ */
+static int pppol2tp_getsockopt(struct socket *sock, int level,
+			       int optname, char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session = sk->sk_user_data;
+	struct pppol2tp_tunnel *tunnel;
+	int val, len;
+	int err;
+
+	if (level != SOL_PPPOL2TP)
+		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+
+	if (get_user(len, (int __user *) optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	if (len < 0)
+		return -EINVAL;
+
+	err = -ENOTCONN;
+	if (sk->sk_user_data == NULL)
+		goto end;
+
+	/* Get the session context */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session_id == 0x0000, treat as operation on tunnel */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0)) {
+		err = -EBADF;
+		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
+		if (tunnel == NULL)
+			goto end;
+
+		err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
+	} else
+		err = pppol2tp_session_getsockopt(sk, session, optname, &val);
+
+	err = -EFAULT;
+	if (put_user(len, (int __user *) optlen))
+		goto end;
+
+	if (copy_to_user((void __user *) optval, &val, len))
+		goto end;
+
+	err = 0;
+end:
+	return err;
+}
+
+/*****************************************************************************
+ * /proc filesystem for debug
+ *****************************************************************************/
+
+#ifdef CONFIG_PROC_FS
+
+#include <linux/seq_file.h>
+
+struct pppol2tp_seq_data {
+	struct pppol2tp_tunnel *tunnel; /* current tunnel */
+	struct pppol2tp_session *session; /* NULL means get first session in tunnel */
+};
+
+static struct pppol2tp_session *next_session(struct pppol2tp_tunnel *tunnel, struct pppol2tp_session *curr)
+{
+	struct pppol2tp_session *session = NULL;
+	struct hlist_node *walk;
+	int found = 0;
+	int next = 0;
+	int i;
+
+	read_lock(&tunnel->hlist_lock);
+	for (i = 0; i < PPPOL2TP_HASH_SIZE; i++) {
+		hlist_for_each_entry(session, walk, &tunnel->session_hlist[i], hlist) {
+			if (curr == NULL) {
+				found = 1;
+				goto out;
+			}
+			if (session == curr) {
+				next = 1;
+				continue;
+			}
+			if (next) {
+				found = 1;
+				goto out;
+			}
+		}
+	}
+out:
+	read_unlock(&tunnel->hlist_lock);
+	if (!found)
+		session = NULL;
+
+	return session;
+}
+
+static struct pppol2tp_tunnel *next_tunnel(struct pppol2tp_tunnel *curr)
+{
+	struct pppol2tp_tunnel *tunnel = NULL;
+
+	read_lock(&pppol2tp_tunnel_list_lock);
+	if (list_is_last(&curr->list, &pppol2tp_tunnel_list)) {
+		goto out;
+	}
+	tunnel = list_entry(curr->list.next, struct pppol2tp_tunnel, list);
+out:
+	read_unlock(&pppol2tp_tunnel_list_lock);
+
+	return tunnel;
+}
+
+static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs)
+{
+	struct pppol2tp_seq_data *pd = SEQ_START_TOKEN;
+	loff_t pos = *offs;
+
+	if (!pos)
+		goto out;
+
+	BUG_ON(m->private == NULL);
+	pd = m->private;
+
+	if (pd->tunnel == NULL) {
+		if (!list_empty(&pppol2tp_tunnel_list))
+			pd->tunnel = list_entry(pppol2tp_tunnel_list.next, struct pppol2tp_tunnel, list);
+	} else {
+		pd->session = next_session(pd->tunnel, pd->session);
+		if (pd->session == NULL) {
+			pd->tunnel = next_tunnel(pd->tunnel);
+		}
+	}
+
+	/* NULL tunnel and session indicates end of list */
+	if ((pd->tunnel == NULL) && (pd->session == NULL))
+		pd = NULL;
+
+out:
+	return pd;
+}
+
+static void *pppol2tp_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return NULL;
+}
+
+static void pppol2tp_seq_stop(struct seq_file *p, void *v)
+{
+	/* nothing to do */
+}
+
+static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
+{
+	struct pppol2tp_tunnel *tunnel = v;
+
+	seq_printf(m, "\nTUNNEL '%s', %c %d\n",
+		   tunnel->name,
+		   (tunnel == tunnel->sock->sk_user_data) ? 'Y':'N',
+		   atomic_read(&tunnel->ref_count) - 1);
+	seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
+		   tunnel->debug,
+		   tunnel->stats.tx_packets, tunnel->stats.tx_bytes,
+		   tunnel->stats.tx_errors,
+		   tunnel->stats.rx_packets, tunnel->stats.rx_bytes,
+		   tunnel->stats.rx_errors);
+}
+
+static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
+{
+	struct pppol2tp_session *session = v;
+
+	seq_printf(m, "  SESSION '%s' %08X/%d %04X/%04X -> "
+		   "%04X/%04X %d %c\n",
+		   session->name,
+		   ntohl(session->tunnel_addr.addr.sin_addr.s_addr),
+		   ntohs(session->tunnel_addr.addr.sin_port),
+		   session->tunnel_addr.s_tunnel,
+		   session->tunnel_addr.s_session,
+		   session->tunnel_addr.d_tunnel,
+		   session->tunnel_addr.d_session,
+		   session->sock->sk_state,
+		   (session == session->sock->sk_user_data) ?
+		   'Y' : 'N');
+	seq_printf(m, "   %d/%d/%c/%c/%s %08x %u\n",
+		   session->mtu, session->mru,
+		   session->recv_seq ? 'R' : '-',
+		   session->send_seq ? 'S' : '-',
+		   session->lns_mode ? "LNS" : "LAC",
+		   session->debug,
+		   jiffies_to_msecs(session->reorder_timeout));
+	seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
+		   session->nr, session->ns,
+		   session->stats.tx_packets,
+		   session->stats.tx_bytes,
+		   session->stats.tx_errors,
+		   session->stats.rx_packets,
+		   session->stats.rx_bytes,
+		   session->stats.rx_errors);
+}
+
+static int pppol2tp_seq_show(struct seq_file *m, void *v)
+{
+	struct pppol2tp_seq_data *pd = v;
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
+		seq_puts(m, "TUNNEL name, user-data-ok session-count\n");
+		seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		seq_puts(m, "  SESSION name, addr/port src-tid/sid "
+			 "dest-tid/sid state user-data-ok\n");
+		seq_puts(m, "   mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
+		seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		goto out;
+	}
+
+	/* Show the tunnel or session context.
+	 */
+	if (pd->session == NULL)
+		pppol2tp_seq_tunnel_show(m, pd->tunnel);
+	else
+		pppol2tp_seq_session_show(m, pd->session);
+
+out:
+	return 0;
+}
+
+static struct seq_operations pppol2tp_seq_ops = {
+	.start		= pppol2tp_seq_start,
+	.next		= pppol2tp_seq_next,
+	.stop		= pppol2tp_seq_stop,
+	.show		= pppol2tp_seq_show,
+};
+
+/* Called when our /proc file is opened. We allocate data for use when
+ * iterating our tunnel / session contexts and store it in the private
+ * data of the seq_file.
+ */
+static int pppol2tp_proc_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	struct pppol2tp_seq_data *pd;
+	int ret = 0;
+
+	ret = seq_open(file, &pppol2tp_seq_ops);
+	if (ret < 0)
+		goto out;
+
+	m = file->private_data;
+
+	/* Allocate and fill our proc_data for access later */
+	ret = -ENOMEM;
+	m->private = kzalloc(sizeof(struct pppol2tp_seq_data), GFP_KERNEL);
+	if (m->private == NULL)
+		goto out;
+
+	pd = m->private;
+	ret = 0;
+
+out:
+	return ret;
+}
+
+/* Called when /proc file access completes.
+ */
+static int pppol2tp_proc_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = (struct seq_file *)file->private_data;
+
+	kfree(m->private);
+	m->private = NULL;
+
+	return seq_release(inode, file);
+}
+
+static struct file_operations pppol2tp_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pppol2tp_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= pppol2tp_proc_release,
+};
+
+static struct proc_dir_entry *pppol2tp_proc;
+
+#endif /* CONFIG_PROC_FS */
+
+/*****************************************************************************
+ * Init and cleanup
+ *****************************************************************************/
+
+static struct proto_ops pppol2tp_ops = {
+	.family		= AF_PPPOX,
+	.owner		= THIS_MODULE,
+	.release	= pppol2tp_release,
+	.bind		= sock_no_bind,
+	.connect	= pppol2tp_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.getname	= pppol2tp_getname,
+	.poll		= datagram_poll,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= pppol2tp_setsockopt,
+	.getsockopt	= pppol2tp_getsockopt,
+	.sendmsg	= pppol2tp_sendmsg,
+	.recvmsg	= pppol2tp_recvmsg,
+	.mmap		= sock_no_mmap,
+	.ioctl		= pppox_ioctl,
+};
+
+static struct pppox_proto pppol2tp_proto = {
+	.create		= pppol2tp_create,
+	.ioctl		= pppol2tp_ioctl
+};
+
+static int __init pppol2tp_init(void)
+{
+	int err;
+
+	err = proto_register(&pppol2tp_sk_proto, 0);
+	if (err)
+		goto out;
+	err = register_pppox_proto(PX_PROTO_OL2TP, &pppol2tp_proto);
+	if (err)
+		goto out_unregister_pppol2tp_proto;
+
+#ifdef CONFIG_PROC_FS
+	pppol2tp_proc = create_proc_entry("pppol2tp", 0, proc_net);
+	if (!pppol2tp_proc) {
+		err = -ENOMEM;
+		goto out_unregister_pppox_proto;
+	}
+	pppol2tp_proc->proc_fops = &pppol2tp_proc_fops;
+#endif /* CONFIG_PROC_FS */
+	printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
+	       PPPOL2TP_DRV_VERSION);
+
+out:
+	return err;
+
+out_unregister_pppox_proto:
+	unregister_pppox_proto(PX_PROTO_OL2TP);
+out_unregister_pppol2tp_proto:
+	proto_unregister(&pppol2tp_sk_proto);
+	goto out;
+}
+
+static void __exit pppol2tp_exit(void)
+{
+	unregister_pppox_proto(PX_PROTO_OL2TP);
+
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("pppol2tp", proc_net);
+#endif
+	proto_unregister(&pppol2tp_sk_proto);
+}
+
+module_init(pppol2tp_init);
+module_exit(pppol2tp_exit);
+
+MODULE_AUTHOR("Martijn van Oosterhout <kleptog@svana.org>,"
+	      "James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("PPP over L2TP over UDP");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(PPPOL2TP_DRV_VERSION);
-- 
cgit v0.10.2


From 38d15b656258b52a659fcf3e181f85b51bd1851f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 27 Jun 2007 15:52:25 -0700
Subject: [PPPOL2TP]: Use proper printf format specifier for size_t.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
index 8a6bff5..5891a0f 100644
--- a/drivers/net/pppol2tp.c
+++ b/drivers/net/pppol2tp.c
@@ -890,11 +890,11 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	/* Debug */
 	if (session->send_seq)
 		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %d bytes, ns=%hu\n", session->name,
+		       "%s: send %Zd bytes, ns=%hu\n", session->name,
 		       total_len, session->ns - 1);
 	else
 		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %d bytes\n", session->name, total_len);
+		       "%s: send %Zd bytes\n", session->name, total_len);
 
 	if (session->debug & PPPOL2TP_MSG_DATA) {
 		int i;
-- 
cgit v0.10.2


From a6d2370b0839c228ae4e680e75263ecf0a73e251 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Wed, 27 Jun 2007 15:53:17 -0700
Subject: [L2TP]: Add PPPoL2TP maintainer

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/MAINTAINERS b/MAINTAINERS
index 151f4ef..fcfe598 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2903,6 +2903,11 @@ P:	Michal Ostrowski
 M:	mostrows@speakeasy.net
 S:	Maintained
 
+PPP OVER L2TP
+P:	James Chapman
+M:	jchapman@katalix.com
+S:	Maintained
+
 PREEMPTIBLE KERNEL
 P:	Robert Love
 M:	rml@tech9.net
-- 
cgit v0.10.2


From 58e50a904ec78caf4ca938801c031413b0d3f962 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Wed, 27 Jun 2007 15:53:49 -0700
Subject: [L2TP]: Add PPPoL2TP in-kernel documentation

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/Documentation/networking/l2tp.txt b/Documentation/networking/l2tp.txt
new file mode 100644
index 0000000..2451f55
--- /dev/null
+++ b/Documentation/networking/l2tp.txt
@@ -0,0 +1,169 @@
+This brief document describes how to use the kernel's PPPoL2TP driver
+to provide L2TP functionality. L2TP is a protocol that tunnels one or
+more PPP sessions over a UDP tunnel. It is commonly used for VPNs
+(L2TP/IPSec) and by ISPs to tunnel subscriber PPP sessions over an IP
+network infrastructure.
+
+Design
+======
+
+The PPPoL2TP driver, drivers/net/pppol2tp.c, provides a mechanism by
+which PPP frames carried through an L2TP session are passed through
+the kernel's PPP subsystem. The standard PPP daemon, pppd, handles all
+PPP interaction with the peer. PPP network interfaces are created for
+each local PPP endpoint.
+
+The L2TP protocol http://www.faqs.org/rfcs/rfc2661.html defines L2TP
+control and data frames. L2TP control frames carry messages between
+L2TP clients/servers and are used to setup / teardown tunnels and
+sessions. An L2TP client or server is implemented in userspace and
+will use a regular UDP socket per tunnel. L2TP data frames carry PPP
+frames, which may be PPP control or PPP data. The kernel's PPP
+subsystem arranges for PPP control frames to be delivered to pppd,
+while data frames are forwarded as usual.
+
+Each tunnel and session within a tunnel is assigned a unique tunnel_id
+and session_id. These ids are carried in the L2TP header of every
+control and data packet. The pppol2tp driver uses them to lookup
+internal tunnel and/or session contexts. Zero tunnel / session ids are
+treated specially - zero ids are never assigned to tunnels or sessions
+in the network. In the driver, the tunnel context keeps a pointer to
+the tunnel UDP socket. The session context keeps a pointer to the
+PPPoL2TP socket, as well as other data that lets the driver interface
+to the kernel PPP subsystem.
+
+Note that the pppol2tp kernel driver handles only L2TP data frames;
+L2TP control frames are simply passed up to userspace in the UDP
+tunnel socket. The kernel handles all datapath aspects of the
+protocol, including data packet resequencing (if enabled).
+
+There are a number of requirements on the userspace L2TP daemon in
+order to use the pppol2tp driver.
+
+1. Use a UDP socket per tunnel.
+
+2. Create a single PPPoL2TP socket per tunnel bound to a special null
+   session id. This is used only for communicating with the driver but
+   must remain open while the tunnel is active. Opening this tunnel
+   management socket causes the driver to mark the tunnel socket as an
+   L2TP UDP encapsulation socket and flags it for use by the
+   referenced tunnel id. This hooks up the UDP receive path via
+   udp_encap_rcv() in net/ipv4/udp.c. PPP data frames are never passed
+   in this special PPPoX socket.
+
+3. Create a PPPoL2TP socket per L2TP session. This is typically done
+   by starting pppd with the pppol2tp plugin and appropriate
+   arguments. A PPPoL2TP tunnel management socket (Step 2) must be
+   created before the first PPPoL2TP session socket is created.
+
+When creating PPPoL2TP sockets, the application provides information
+to the driver about the socket in a socket connect() call. Source and
+destination tunnel and session ids are provided, as well as the file
+descriptor of a UDP socket. See struct pppol2tp_addr in
+include/linux/if_ppp.h. Note that zero tunnel / session ids are
+treated specially. When creating the per-tunnel PPPoL2TP management
+socket in Step 2 above, zero source and destination session ids are
+specified, which tells the driver to prepare the supplied UDP file
+descriptor for use as an L2TP tunnel socket.
+
+Userspace may control behavior of the tunnel or session using
+setsockopt and ioctl on the PPPoX socket. The following socket
+options are supported:-
+
+DEBUG     - bitmask of debug message categories. See below.
+SENDSEQ   - 0 => don't send packets with sequence numbers
+            1 => send packets with sequence numbers
+RECVSEQ   - 0 => receive packet sequence numbers are optional
+            1 => drop receive packets without sequence numbers
+LNSMODE   - 0 => act as LAC.
+            1 => act as LNS.
+REORDERTO - reorder timeout (in millisecs). If 0, don't try to reorder.
+
+Only the DEBUG option is supported by the special tunnel management
+PPPoX socket.
+
+In addition to the standard PPP ioctls, a PPPIOCGL2TPSTATS is provided
+to retrieve tunnel and session statistics from the kernel using the
+PPPoX socket of the appropriate tunnel or session.
+
+Debugging
+=========
+
+The driver supports a flexible debug scheme where kernel trace
+messages may be optionally enabled per tunnel and per session. Care is
+needed when debugging a live system since the messages are not
+rate-limited and a busy system could be swamped. Userspace uses
+setsockopt on the PPPoX socket to set a debug mask.
+
+The following debug mask bits are available:
+
+PPPOL2TP_MSG_DEBUG    verbose debug (if compiled in)
+PPPOL2TP_MSG_CONTROL  userspace - kernel interface
+PPPOL2TP_MSG_SEQ      sequence numbers handling
+PPPOL2TP_MSG_DATA     data packets
+
+Sample Userspace Code
+=====================
+
+1. Create tunnel management PPPoX socket
+
+        kernel_fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
+        if (kernel_fd >= 0) {
+                struct sockaddr_pppol2tp sax;
+                struct sockaddr_in const *peer_addr;
+
+                peer_addr = l2tp_tunnel_get_peer_addr(tunnel);
+                memset(&sax, 0, sizeof(sax));
+                sax.sa_family = AF_PPPOX;
+                sax.sa_protocol = PX_PROTO_OL2TP;
+                sax.pppol2tp.fd = udp_fd;       /* fd of tunnel UDP socket */
+                sax.pppol2tp.addr.sin_addr.s_addr = peer_addr->sin_addr.s_addr;
+                sax.pppol2tp.addr.sin_port = peer_addr->sin_port;
+                sax.pppol2tp.addr.sin_family = AF_INET;
+                sax.pppol2tp.s_tunnel = tunnel_id;
+                sax.pppol2tp.s_session = 0;     /* special case: mgmt socket */
+                sax.pppol2tp.d_tunnel = 0;
+                sax.pppol2tp.d_session = 0;     /* special case: mgmt socket */
+
+                if(connect(kernel_fd, (struct sockaddr *)&sax, sizeof(sax) ) < 0 ) {
+                        perror("connect failed");
+                        result = -errno;
+                        goto err;
+                }
+        }
+
+2. Create session PPPoX data socket
+
+        struct sockaddr_pppol2tp sax;
+        int fd;
+
+        /* Note, the target socket must be bound already, else it will not be ready */
+        sax.sa_family = AF_PPPOX;
+        sax.sa_protocol = PX_PROTO_OL2TP;
+        sax.pppol2tp.fd = tunnel_fd;
+        sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
+        sax.pppol2tp.addr.sin_port = addr->sin_port;
+        sax.pppol2tp.addr.sin_family = AF_INET;
+        sax.pppol2tp.s_tunnel  = tunnel_id;
+        sax.pppol2tp.s_session = session_id;
+        sax.pppol2tp.d_tunnel  = peer_tunnel_id;
+        sax.pppol2tp.d_session = peer_session_id;
+
+        /* session_fd is the fd of the session's PPPoL2TP socket.
+         * tunnel_fd is the fd of the tunnel UDP socket.
+         */
+        fd = connect(session_fd, (struct sockaddr *)&sax, sizeof(sax));
+        if (fd < 0 )    {
+                return -errno;
+        }
+        return 0;
+
+Miscellanous
+============
+
+The PPPoL2TP driver was developed as part of the OpenL2TP project by
+Katalix Systems Ltd. OpenL2TP is a full-featured L2TP client / server,
+designed from the ground up to have the L2TP datapath in the
+kernel. The project also implemented the pppol2tp plugin for pppd
+which allows pppd to use the kernel driver. Details can be found at
+http://openl2tp.sourceforge.net.
-- 
cgit v0.10.2


From a298830cd026b4c0cde45ef3679a5f68a17577e6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 28 Jun 2007 13:44:37 -0700
Subject: [NET]: Fix TX checksum feature check

This patch fixes a boolean error in the new TX checksum check
that causes bogus TSO packets to be generated.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/dev.c b/net/core/dev.c
index 36e9bf8..6dce9d2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1509,11 +1509,11 @@ int dev_queue_xmit(struct sk_buff *skb)
 		skb_set_transport_header(skb, skb->csum_start -
 					      skb_headroom(skb));
 
-		if (!(dev->features & NETIF_F_GEN_CSUM)
-		    || ((dev->features & NETIF_F_IP_CSUM)
-			&& skb->protocol == htons(ETH_P_IP))
-		    || ((dev->features & NETIF_F_IPV6_CSUM)
-			&& skb->protocol == htons(ETH_P_IPV6)))
+		if (!(dev->features & NETIF_F_GEN_CSUM) &&
+		    !((dev->features & NETIF_F_IP_CSUM) &&
+		      skb->protocol == htons(ETH_P_IP)) &&
+		    !((dev->features & NETIF_F_IPV6_CSUM) &&
+		      skb->protocol == htons(ETH_P_IPV6)))
 			if (skb_checksum_help(skb))
 				goto out_kfree_skb;
 	}
-- 
cgit v0.10.2


From a093bf006e09a305e95ff0938c0a18b7520aef67 Mon Sep 17 00:00:00 2001
From: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Date: Thu, 28 Jun 2007 20:45:47 -0700
Subject: [NET]: [DOC] Multiqueue hardware support documentation

Add a brief howto to Documentation/networking for multiqueue.  It
explains how to use the multiqueue API in a driver to support
multiqueue paths from the stack, as well as the qdiscs to use for
feeding a multiqueue device.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/Documentation/networking/multiqueue.txt b/Documentation/networking/multiqueue.txt
new file mode 100644
index 0000000..00b60cc
--- /dev/null
+++ b/Documentation/networking/multiqueue.txt
@@ -0,0 +1,111 @@
+
+		HOWTO for multiqueue network device support
+		===========================================
+
+Section 1: Base driver requirements for implementing multiqueue support
+Section 2: Qdisc support for multiqueue devices
+Section 3: Brief howto using PRIO or RR for multiqueue devices
+
+
+Intro: Kernel support for multiqueue devices
+---------------------------------------------------------
+
+Kernel support for multiqueue devices is only an API that is presented to the
+netdevice layer for base drivers to implement.  This feature is part of the
+core networking stack, and all network devices will be running on the
+multiqueue-aware stack.  If a base driver only has one queue, then these
+changes are transparent to that driver.
+
+
+Section 1: Base driver requirements for implementing multiqueue support
+-----------------------------------------------------------------------
+
+Base drivers are required to use the new alloc_etherdev_mq() or
+alloc_netdev_mq() functions to allocate the subqueues for the device.  The
+underlying kernel API will take care of the allocation and deallocation of
+the subqueue memory, as well as netdev configuration of where the queues
+exist in memory.
+
+The base driver will also need to manage the queues as it does the global
+netdev->queue_lock today.  Therefore base drivers should use the
+netif_{start|stop|wake}_subqueue() functions to manage each queue while the
+device is still operational.  netdev->queue_lock is still used when the device
+comes online or when it's completely shut down (unregister_netdev(), etc.).
+
+Finally, the base driver should indicate that it is a multiqueue device.  The
+feature flag NETIF_F_MULTI_QUEUE should be added to the netdev->features
+bitmap on device initialization.  Below is an example from e1000:
+
+#ifdef CONFIG_E1000_MQ
+	if ( (adapter->hw.mac.type == e1000_82571) ||
+	     (adapter->hw.mac.type == e1000_82572) ||
+	     (adapter->hw.mac.type == e1000_80003es2lan))
+		netdev->features |= NETIF_F_MULTI_QUEUE;
+#endif
+
+
+Section 2: Qdisc support for multiqueue devices
+-----------------------------------------------
+
+Currently two qdiscs support multiqueue devices.  A new round-robin qdisc,
+sch_rr, and sch_prio. The qdisc is responsible for classifying the skb's to
+bands and queues, and will store the queue mapping into skb->queue_mapping.
+Use this field in the base driver to determine which queue to send the skb
+to.
+
+sch_rr has been added for hardware that doesn't want scheduling policies from
+software, so it's a straight round-robin qdisc.  It uses the same syntax and
+classification priomap that sch_prio uses, so it should be intuitive to
+configure for people who've used sch_prio.
+
+The PRIO qdisc naturally plugs into a multiqueue device.  If PRIO has been
+built with NET_SCH_PRIO_MQ, then upon load, it will make sure the number of
+bands requested is equal to the number of queues on the hardware.  If they
+are equal, it sets a one-to-one mapping up between the queues and bands.  If
+they're not equal, it will not load the qdisc.  This is the same behavior
+for RR.  Once the association is made, any skb that is classified will have
+skb->queue_mapping set, which will allow the driver to properly queue skb's
+to multiple queues.
+
+
+Section 3: Brief howto using PRIO and RR for multiqueue devices
+---------------------------------------------------------------
+
+The userspace command 'tc,' part of the iproute2 package, is used to configure
+qdiscs.  To add the PRIO qdisc to your network device, assuming the device is
+called eth0, run the following command:
+
+# tc qdisc add dev eth0 root handle 1: prio bands 4 multiqueue
+
+This will create 4 bands, 0 being highest priority, and associate those bands
+to the queues on your NIC.  Assuming eth0 has 4 Tx queues, the band mapping
+would look like:
+
+band 0 => queue 0
+band 1 => queue 1
+band 2 => queue 2
+band 3 => queue 3
+
+Traffic will begin flowing through each queue if your TOS values are assigning
+traffic across the various bands.  For example, ssh traffic will always try to
+go out band 0 based on TOS -> Linux priority conversion (realtime traffic),
+so it will be sent out queue 0.  ICMP traffic (pings) fall into the "normal"
+traffic classification, which is band 1.  Therefore pings will be send out
+queue 1 on the NIC.
+
+Note the use of the multiqueue keyword.  This is only in versions of iproute2
+that support multiqueue networking devices; if this is omitted when loading
+a qdisc onto a multiqueue device, the qdisc will load and operate the same
+if it were loaded onto a single-queue device (i.e. - sends all traffic to
+queue 0).
+
+Another alternative to multiqueue band allocation can be done by using the
+multiqueue option and specify 0 bands.  If this is the case, the qdisc will
+allocate the number of bands to equal the number of queues that the device
+reports, and bring the qdisc online.
+
+The behavior of tc filters remains the same, where it will override TOS priority
+classification.
+
+
+Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com>
-- 
cgit v0.10.2


From f25f4e44808f0f6c9875d94ef1c41ef86c288eb2 Mon Sep 17 00:00:00 2001
From: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Date: Fri, 6 Jul 2007 13:36:20 -0700
Subject: [CORE] Stack changes to add multiqueue hardware support API

Add the multiqueue hardware device support API to the core network
stack.  Allow drivers to allocate multiple queues and manage them at
the netdev level if they choose to do so.

Added a new field to sk_buff, namely queue_mapping, for drivers to
know which tx_ring to select based on OS classification of the flow.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index c251cca..d4e39ff 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -25,6 +25,14 @@ menuconfig NETDEVICES
 # that for each of the symbols.
 if NETDEVICES
 
+config NETDEVICES_MULTIQUEUE
+	bool "Netdevice multiple hardware queue support"
+	---help---
+	  Say Y here if you want to allow the network stack to use multiple
+	  hardware TX queues on an ethernet device.
+
+	  Most people will say N here.
+
 config IFB
 	tristate "Intermediate Functional Block support"
 	depends on NET_CLS_ACT
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index f48eb89..6cdb973 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -39,7 +39,8 @@ extern void		eth_header_cache_update(struct hh_cache *hh, struct net_device *dev
 extern int		eth_header_cache(struct neighbour *neigh,
 					 struct hh_cache *hh);
 
-extern struct net_device *alloc_etherdev(int sizeof_priv);
+extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count);
+#define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 
 /**
  * is_zero_ether_addr - Determine if give Ethernet address is all zeros.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2c0cc19..9817821 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -108,6 +108,14 @@ struct wireless_dev;
 #define MAX_HEADER (LL_MAX_HEADER + 48)
 #endif
 
+struct net_device_subqueue
+{
+	/* Give a control state for each queue.  This struct may contain
+	 * per-queue locks in the future.
+	 */
+	unsigned long   state;
+};
+
 /*
  *	Network device statistics. Akin to the 2.0 ether stats but
  *	with byte counters.
@@ -331,6 +339,7 @@ struct net_device
 #define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
 #define NETIF_F_GSO		2048	/* Enable software GSO. */
 #define NETIF_F_LLTX		4096	/* LockLess TX */
+#define NETIF_F_MULTI_QUEUE	16384	/* Has multiple TX/RX queues */
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
@@ -557,6 +566,10 @@ struct net_device
 
 	/* rtnetlink link ops */
 	const struct rtnl_link_ops *rtnl_link_ops;
+
+	/* The TX queue control structures */
+	unsigned int			egress_subqueue_count;
+	struct net_device_subqueue	egress_subqueue[0];
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
@@ -565,9 +578,7 @@ struct net_device
 
 static inline void *netdev_priv(const struct net_device *dev)
 {
-	return (char *)dev + ((sizeof(struct net_device)
-					+ NETDEV_ALIGN_CONST)
-				& ~NETDEV_ALIGN_CONST);
+	return dev->priv;
 }
 
 #define SET_MODULE_OWNER(dev) do { } while (0)
@@ -719,6 +730,62 @@ static inline int netif_running(const struct net_device *dev)
 	return test_bit(__LINK_STATE_START, &dev->state);
 }
 
+/*
+ * Routines to manage the subqueues on a device.  We only need start
+ * stop, and a check if it's stopped.  All other device management is
+ * done at the overall netdevice level.
+ * Also test the device if we're multiqueue.
+ */
+static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+	clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
+#endif
+}
+
+static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+#ifdef CONFIG_NETPOLL_TRAP
+	if (netpoll_trap())
+		return;
+#endif
+	set_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
+#endif
+}
+
+static inline int netif_subqueue_stopped(const struct net_device *dev,
+					 u16 queue_index)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+	return test_bit(__LINK_STATE_XOFF,
+			&dev->egress_subqueue[queue_index].state);
+#else
+	return 0;
+#endif
+}
+
+static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+#ifdef CONFIG_NETPOLL_TRAP
+	if (netpoll_trap())
+		return;
+#endif
+	if (test_and_clear_bit(__LINK_STATE_XOFF,
+			       &dev->egress_subqueue[queue_index].state))
+		__netif_schedule(dev);
+#endif
+}
+
+static inline int netif_is_multiqueue(const struct net_device *dev)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+	return (!!(NETIF_F_MULTI_QUEUE & dev->features));
+#else
+	return 0;
+#endif
+}
 
 /* Use this variant when it is known for sure that it
  * is executing from interrupt context.
@@ -1009,8 +1076,11 @@ static inline void netif_tx_disable(struct net_device *dev)
 extern void		ether_setup(struct net_device *dev);
 
 /* Support for loadable net-drivers */
-extern struct net_device *alloc_netdev(int sizeof_priv, const char *name,
-				       void (*setup)(struct net_device *));
+extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
+				       void (*setup)(struct net_device *),
+				       unsigned int queue_count);
+#define alloc_netdev(sizeof_priv, name, setup) \
+	alloc_netdev_mq(sizeof_priv, name, setup, 1)
 extern int		register_netdev(struct net_device *dev);
 extern void		unregister_netdev(struct net_device *dev);
 /* Functions used for secondary unicast and multicast support */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 881fe80..2d6a14f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -196,7 +196,6 @@ typedef unsigned char *sk_buff_data_t;
  *	@sk: Socket we are owned by
  *	@tstamp: Time we arrived
  *	@dev: Device we arrived on/are leaving by
- *	@iif: ifindex of device we arrived on
  *	@transport_header: Transport layer header
  *	@network_header: Network layer header
  *	@mac_header: Link layer header
@@ -231,6 +230,8 @@ typedef unsigned char *sk_buff_data_t;
  *	@nfctinfo: Relationship of this skb to the connection
  *	@nfct_reasm: netfilter conntrack re-assembly pointer
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
+ *	@iif: ifindex of device we arrived on
+ *	@queue_mapping: Queue mapping for multiqueue devices
  *	@tc_index: Traffic control index
  *	@tc_verd: traffic control verdict
  *	@dma_cookie: a cookie to one of several possible DMA operations
@@ -246,8 +247,6 @@ struct sk_buff {
 	struct sock		*sk;
 	ktime_t			tstamp;
 	struct net_device	*dev;
-	int			iif;
-	/* 4 byte hole on 64 bit*/
 
 	struct  dst_entry	*dst;
 	struct	sec_path	*sp;
@@ -290,12 +289,18 @@ struct sk_buff {
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct nf_bridge_info	*nf_bridge;
 #endif
+
+	int			iif;
+	__u16			queue_mapping;
+
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
 #ifdef CONFIG_NET_CLS_ACT
 	__u16			tc_verd;	/* traffic control verdict */
 #endif
 #endif
+	/* 2 byte hole */
+
 #ifdef CONFIG_NET_DMA
 	dma_cookie_t		dma_cookie;
 #endif
@@ -1725,6 +1730,20 @@ static inline void skb_init_secmark(struct sk_buff *skb)
 { }
 #endif
 
+static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+	skb->queue_mapping = queue_mapping;
+#endif
+}
+
+static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+	to->queue_mapping = from->queue_mapping;
+#endif
+}
+
 static inline int skb_is_gso(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_size;
diff --git a/net/core/dev.c b/net/core/dev.c
index 6dce9d2..7ddf66d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1429,7 +1429,9 @@ gso:
 			skb->next = nskb;
 			return rc;
 		}
-		if (unlikely(netif_queue_stopped(dev) && skb->next))
+		if (unlikely((netif_queue_stopped(dev) ||
+			     netif_subqueue_stopped(dev, skb->queue_mapping)) &&
+			     skb->next))
 			return NETDEV_TX_BUSY;
 	} while (skb->next);
 
@@ -1547,6 +1549,8 @@ gso:
 		spin_lock(&dev->queue_lock);
 		q = dev->qdisc;
 		if (q->enqueue) {
+			/* reset queue_mapping to zero */
+			skb->queue_mapping = 0;
 			rc = q->enqueue(skb, q);
 			qdisc_run(dev);
 			spin_unlock(&dev->queue_lock);
@@ -1576,7 +1580,8 @@ gso:
 
 			HARD_TX_LOCK(dev, cpu);
 
-			if (!netif_queue_stopped(dev)) {
+			if (!netif_queue_stopped(dev) &&
+			    !netif_subqueue_stopped(dev, skb->queue_mapping)) {
 				rc = 0;
 				if (!dev_hard_start_xmit(skb, dev)) {
 					HARD_TX_UNLOCK(dev);
@@ -3539,16 +3544,18 @@ static struct net_device_stats *internal_stats(struct net_device *dev)
 }
 
 /**
- *	alloc_netdev - allocate network device
+ *	alloc_netdev_mq - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
  *	@name:		device name format string
  *	@setup:		callback to initialize device
+ *	@queue_count:	the number of subqueues to allocate
  *
  *	Allocates a struct net_device with private data area for driver use
- *	and performs basic initialization.
+ *	and performs basic initialization.  Also allocates subquue structs
+ *	for each queue on the device at the end of the netdevice.
  */
-struct net_device *alloc_netdev(int sizeof_priv, const char *name,
-		void (*setup)(struct net_device *))
+struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
+		void (*setup)(struct net_device *), unsigned int queue_count)
 {
 	void *p;
 	struct net_device *dev;
@@ -3557,7 +3564,9 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
 	/* ensure 32-byte alignment of both the device and private area */
-	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
+	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
+		     (sizeof(struct net_device_subqueue) * queue_count)) &
+		     ~NETDEV_ALIGN_CONST;
 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
 
 	p = kzalloc(alloc_size, GFP_KERNEL);
@@ -3570,15 +3579,22 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
 	dev->padded = (char *)dev - (char *)p;
 
-	if (sizeof_priv)
-		dev->priv = netdev_priv(dev);
+	if (sizeof_priv) {
+		dev->priv = ((char *)dev +
+			     ((sizeof(struct net_device) +
+			       (sizeof(struct net_device_subqueue) *
+				queue_count) + NETDEV_ALIGN_CONST)
+			      & ~NETDEV_ALIGN_CONST));
+	}
+
+	dev->egress_subqueue_count = queue_count;
 
 	dev->get_stats = internal_stats;
 	setup(dev);
 	strcpy(dev->name, name);
 	return dev;
 }
-EXPORT_SYMBOL(alloc_netdev);
+EXPORT_SYMBOL(alloc_netdev_mq);
 
 /**
  *	free_netdev - free network device
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a0efdd7..4b06d19 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -66,8 +66,9 @@ static void queue_process(struct work_struct *work)
 
 		local_irq_save(flags);
 		netif_tx_lock(dev);
-		if (netif_queue_stopped(dev) ||
-		    dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
+		if ((netif_queue_stopped(dev) ||
+		     netif_subqueue_stopped(dev, skb->queue_mapping)) ||
+		     dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
 			skb_queue_head(&npinfo->txq, skb);
 			netif_tx_unlock(dev);
 			local_irq_restore(flags);
@@ -254,7 +255,8 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
 		     tries > 0; --tries) {
 			if (netif_tx_trylock(dev)) {
-				if (!netif_queue_stopped(dev))
+				if (!netif_queue_stopped(dev) &&
+				    !netif_subqueue_stopped(dev, skb->queue_mapping))
 					status = dev->hard_start_xmit(skb, dev);
 				netif_tx_unlock(dev);
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 9cd3a1c..dffe067 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3139,7 +3139,9 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		}
 	}
 
-	if (netif_queue_stopped(odev) || need_resched()) {
+	if ((netif_queue_stopped(odev) ||
+	     netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) ||
+	     need_resched()) {
 		idle_start = getCurUs();
 
 		if (!netif_running(odev)) {
@@ -3154,7 +3156,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 		pkt_dev->idle_acc += getCurUs() - idle_start;
 
-		if (netif_queue_stopped(odev)) {
+		if (netif_queue_stopped(odev) ||
+		    netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) {
 			pkt_dev->next_tx_us = getCurUs();	/* TODO */
 			pkt_dev->next_tx_ns = 0;
 			goto out;	/* Try the next interface */
@@ -3181,7 +3184,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 	}
 
 	netif_tx_lock_bh(odev);
-	if (!netif_queue_stopped(odev)) {
+	if (!netif_queue_stopped(odev) &&
+	    !netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) {
 
 		atomic_inc(&(pkt_dev->skb->users));
 	      retry_now:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c989c3a..6a41b96 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -419,6 +419,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 	n->nohdr = 0;
 	C(pkt_type);
 	C(ip_summed);
+	skb_copy_queue_mapping(n, skb);
 	C(priority);
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
 	C(ipvs_property);
@@ -460,6 +461,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 #endif
 	new->sk		= NULL;
 	new->dev	= old->dev;
+	skb_copy_queue_mapping(new, old);
 	new->priority	= old->priority;
 	new->protocol	= old->protocol;
 	new->dst	= dst_clone(old->dst);
@@ -1932,6 +1934,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		tail = nskb;
 
 		nskb->dev = skb->dev;
+		skb_copy_queue_mapping(nskb, skb);
 		nskb->priority = skb->priority;
 		nskb->protocol = skb->protocol;
 		nskb->dst = dst_clone(skb->dst);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 0ac2524..1387e54 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -316,9 +316,10 @@ void ether_setup(struct net_device *dev)
 EXPORT_SYMBOL(ether_setup);
 
 /**
- * alloc_etherdev - Allocates and sets up an Ethernet device
+ * alloc_etherdev_mq - Allocates and sets up an Ethernet device
  * @sizeof_priv: Size of additional driver-private structure to be allocated
  *	for this Ethernet device
+ * @queue_count: The number of queues this device has.
  *
  * Fill in the fields of the device structure with Ethernet-generic
  * values. Basically does everything except registering the device.
@@ -328,8 +329,8 @@ EXPORT_SYMBOL(ether_setup);
  * this private data area.
  */
 
-struct net_device *alloc_etherdev(int sizeof_priv)
+struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
 {
-	return alloc_netdev(sizeof_priv, "eth%d", ether_setup);
+	return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count);
 }
-EXPORT_SYMBOL(alloc_etherdev);
+EXPORT_SYMBOL(alloc_etherdev_mq);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index f05ad9a..dfe7e45 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -277,6 +277,7 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 	int busy;
 	int nores;
 	int len = skb->len;
+	int subq = skb->queue_mapping;
 	struct sk_buff *skb_res = NULL;
 
 	start = master->slaves;
@@ -293,7 +294,9 @@ restart:
 
 		if (slave->qdisc_sleeping != q)
 			continue;
-		if (netif_queue_stopped(slave) || ! netif_running(slave)) {
+		if (netif_queue_stopped(slave) ||
+		    netif_subqueue_stopped(slave, subq) ||
+		    !netif_running(slave)) {
 			busy = 1;
 			continue;
 		}
@@ -302,6 +305,7 @@ restart:
 		case 0:
 			if (netif_tx_trylock(slave)) {
 				if (!netif_queue_stopped(slave) &&
+				    !netif_subqueue_stopped(slave, subq) &&
 				    slave->hard_start_xmit(skb, slave) == 0) {
 					netif_tx_unlock(slave);
 					master->slaves = NEXT_SLAVE(q);
-- 
cgit v0.10.2


From d62733c8e437fdb58325617c4b3331769ba82d70 Mon Sep 17 00:00:00 2001
From: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Date: Thu, 28 Jun 2007 21:04:31 -0700
Subject: [SCHED]: Qdisc changes and sch_rr added for multiqueue

Add the new sch_rr qdisc for multiqueue network device support.  Allow
sch_prio and sch_rr to be compiled with or without multiqueue hardware
support.

sch_rr is part of sch_prio, and is referenced from MODULE_ALIAS.  This
was done since sch_prio and sch_rr only differ in their dequeue
routine.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index d10f353..268c515 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -101,6 +101,15 @@ struct tc_prio_qopt
 	__u8	priomap[TC_PRIO_MAX+1];	/* Map: logical priority -> PRIO band */
 };
 
+enum
+{
+	TCA_PRIO_UNSPEC,
+	TCA_PRIO_MQ,
+	__TCA_PRIO_MAX
+};
+
+#define TCA_PRIO_MAX    (__TCA_PRIO_MAX - 1)
+
 /* TBF section */
 
 struct tc_tbf_qopt
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 475df84..f321794 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -111,6 +111,17 @@ config NET_SCH_PRIO
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_prio.
 
+config NET_SCH_RR
+	tristate "Multi Band Round Robin Queuing (RR)"
+	select NET_SCH_PRIO
+	---help---
+	  Say Y here if you want to use an n-band round robin packet
+	  scheduler.
+
+	  The module uses sch_prio for its framework and is aliased as
+	  sch_rr, so it will load sch_prio, although it is referred
+	  to using sch_rr.
+
 config NET_SCH_RED
 	tristate "Random Early Detection (RED)"
 	---help---
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 6d7542c..4045220 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -40,9 +40,11 @@
 struct prio_sched_data
 {
 	int bands;
+	int curband; /* for round-robin */
 	struct tcf_proto *filter_list;
 	u8  prio2band[TC_PRIO_MAX+1];
 	struct Qdisc *queues[TCQ_PRIO_BANDS];
+	int mq;
 };
 
 
@@ -70,14 +72,17 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 #endif
 			if (TC_H_MAJ(band))
 				band = 0;
-			return q->queues[q->prio2band[band&TC_PRIO_MAX]];
+			band = q->prio2band[band&TC_PRIO_MAX];
+			goto out;
 		}
 		band = res.classid;
 	}
 	band = TC_H_MIN(band) - 1;
 	if (band >= q->bands)
-		return q->queues[q->prio2band[0]];
-
+		band = q->prio2band[0];
+out:
+	if (q->mq)
+		skb_set_queue_mapping(skb, band);
 	return q->queues[band];
 }
 
@@ -144,17 +149,58 @@ prio_dequeue(struct Qdisc* sch)
 	struct Qdisc *qdisc;
 
 	for (prio = 0; prio < q->bands; prio++) {
-		qdisc = q->queues[prio];
-		skb = qdisc->dequeue(qdisc);
-		if (skb) {
-			sch->q.qlen--;
-			return skb;
+		/* Check if the target subqueue is available before
+		 * pulling an skb.  This way we avoid excessive requeues
+		 * for slower queues.
+		 */
+		if (!netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) {
+			qdisc = q->queues[prio];
+			skb = qdisc->dequeue(qdisc);
+			if (skb) {
+				sch->q.qlen--;
+				return skb;
+			}
 		}
 	}
 	return NULL;
 
 }
 
+static struct sk_buff *rr_dequeue(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+	struct prio_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *qdisc;
+	int bandcount;
+
+	/* Only take one pass through the queues.  If nothing is available,
+	 * return nothing.
+	 */
+	for (bandcount = 0; bandcount < q->bands; bandcount++) {
+		/* Check if the target subqueue is available before
+		 * pulling an skb.  This way we avoid excessive requeues
+		 * for slower queues.  If the queue is stopped, try the
+		 * next queue.
+		 */
+		if (!netif_subqueue_stopped(sch->dev,
+					    (q->mq ? q->curband : 0))) {
+			qdisc = q->queues[q->curband];
+			skb = qdisc->dequeue(qdisc);
+			if (skb) {
+				sch->q.qlen--;
+				q->curband++;
+				if (q->curband >= q->bands)
+					q->curband = 0;
+				return skb;
+			}
+		}
+		q->curband++;
+		if (q->curband >= q->bands)
+			q->curband = 0;
+	}
+	return NULL;
+}
+
 static unsigned int prio_drop(struct Qdisc* sch)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
@@ -198,21 +244,41 @@ prio_destroy(struct Qdisc* sch)
 static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
-	struct tc_prio_qopt *qopt = RTA_DATA(opt);
+	struct tc_prio_qopt *qopt;
+	struct rtattr *tb[TCA_PRIO_MAX];
 	int i;
 
-	if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)))
+	if (rtattr_parse_nested_compat(tb, TCA_PRIO_MAX, opt, qopt,
+				       sizeof(*qopt)))
 		return -EINVAL;
-	if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
+	q->bands = qopt->bands;
+	/* If we're multiqueue, make sure the number of incoming bands
+	 * matches the number of queues on the device we're associating with.
+	 * If the number of bands requested is zero, then set q->bands to
+	 * dev->egress_subqueue_count.
+	 */
+	q->mq = RTA_GET_FLAG(tb[TCA_PRIO_MQ - 1]);
+	if (q->mq) {
+		if (sch->handle != TC_H_ROOT)
+			return -EINVAL;
+		if (netif_is_multiqueue(sch->dev)) {
+			if (q->bands == 0)
+				q->bands = sch->dev->egress_subqueue_count;
+			else if (q->bands != sch->dev->egress_subqueue_count)
+				return -EINVAL;
+		} else
+			return -EOPNOTSUPP;
+	}
+
+	if (q->bands > TCQ_PRIO_BANDS || q->bands < 2)
 		return -EINVAL;
 
 	for (i=0; i<=TC_PRIO_MAX; i++) {
-		if (qopt->priomap[i] >= qopt->bands)
+		if (qopt->priomap[i] >= q->bands)
 			return -EINVAL;
 	}
 
 	sch_tree_lock(sch);
-	q->bands = qopt->bands;
 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
 
 	for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
@@ -268,11 +334,17 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
+	struct rtattr *nest;
 	struct tc_prio_qopt opt;
 
 	opt.bands = q->bands;
 	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
-	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	nest = RTA_NEST_COMPAT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	if (q->mq)
+		RTA_PUT_FLAG(skb, TCA_PRIO_MQ);
+	RTA_NEST_COMPAT_END(skb, nest);
+
 	return skb->len;
 
 rtattr_failure:
@@ -443,17 +515,44 @@ static struct Qdisc_ops prio_qdisc_ops = {
 	.owner		=	THIS_MODULE,
 };
 
+static struct Qdisc_ops rr_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	&prio_class_ops,
+	.id		=	"rr",
+	.priv_size	=	sizeof(struct prio_sched_data),
+	.enqueue	=	prio_enqueue,
+	.dequeue	=	rr_dequeue,
+	.requeue	=	prio_requeue,
+	.drop		=	prio_drop,
+	.init		=	prio_init,
+	.reset		=	prio_reset,
+	.destroy	=	prio_destroy,
+	.change		=	prio_tune,
+	.dump		=	prio_dump,
+	.owner		=	THIS_MODULE,
+};
+
 static int __init prio_module_init(void)
 {
-	return register_qdisc(&prio_qdisc_ops);
+	int err;
+
+	err = register_qdisc(&prio_qdisc_ops);
+	if (err < 0)
+		return err;
+	err = register_qdisc(&rr_qdisc_ops);
+	if (err < 0)
+		unregister_qdisc(&prio_qdisc_ops);
+	return err;
 }
 
 static void __exit prio_module_exit(void)
 {
 	unregister_qdisc(&prio_qdisc_ops);
+	unregister_qdisc(&rr_qdisc_ops);
 }
 
 module_init(prio_module_init)
 module_exit(prio_module_exit)
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("sch_rr");
-- 
cgit v0.10.2


From 61cbc2fca6335be52788773b21efdc52a2750924 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 30 Jun 2007 13:35:52 -0700
Subject: [NET]: Fix secondary unicast/multicast address count maintenance

When a reference to an existing address is increased or decreased without
hitting zero, the address count is incorrectly adjusted.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9817821..8590d68 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1091,8 +1091,8 @@ extern int		dev_unicast_add(struct net_device *dev, void *addr, int alen);
 extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
 extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
 extern void		dev_mc_discard(struct net_device *dev);
-extern int 		__dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, int all);
-extern int		__dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int newonly);
+extern int 		__dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all);
+extern int		__dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly);
 extern void		__dev_addr_discard(struct dev_addr_list **list);
 extern void		dev_set_promiscuity(struct net_device *dev, int inc);
 extern void		dev_set_allmulti(struct net_device *dev, int inc);
diff --git a/net/core/dev.c b/net/core/dev.c
index 7ddf66d..4221dcd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2607,8 +2607,8 @@ void dev_set_rx_mode(struct net_device *dev)
 	netif_tx_unlock_bh(dev);
 }
 
-int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen,
-		      int glbl)
+int __dev_addr_delete(struct dev_addr_list **list, int *count,
+		      void *addr, int alen, int glbl)
 {
 	struct dev_addr_list *da;
 
@@ -2626,13 +2626,15 @@ int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen,
 
 			*list = da->next;
 			kfree(da);
+			(*count)--;
 			return 0;
 		}
 	}
 	return -ENOENT;
 }
 
-int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int glbl)
+int __dev_addr_add(struct dev_addr_list **list, int *count,
+		   void *addr, int alen, int glbl)
 {
 	struct dev_addr_list *da;
 
@@ -2659,6 +2661,7 @@ int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int glbl)
 	da->da_gusers = glbl ? 1 : 0;
 	da->next = *list;
 	*list = da;
+	(*count)++;
 	return 0;
 }
 
@@ -2692,11 +2695,9 @@ int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
 	ASSERT_RTNL();
 
 	netif_tx_lock_bh(dev);
-	err = __dev_addr_delete(&dev->uc_list, addr, alen, 0);
-	if (!err) {
-		dev->uc_count--;
+	err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
+	if (!err)
 		__dev_set_rx_mode(dev);
-	}
 	netif_tx_unlock_bh(dev);
 	return err;
 }
@@ -2718,11 +2719,9 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen)
 	ASSERT_RTNL();
 
 	netif_tx_lock_bh(dev);
-	err = __dev_addr_add(&dev->uc_list, addr, alen, 0);
-	if (!err) {
-		dev->uc_count++;
+	err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
+	if (!err)
 		__dev_set_rx_mode(dev);
-	}
 	netif_tx_unlock_bh(dev);
 	return err;
 }
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 5cc9b44..aa38100 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -72,10 +72,9 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
 	int err;
 
 	netif_tx_lock_bh(dev);
-	err = __dev_addr_delete(&dev->mc_list, addr, alen, glbl);
+	err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
+				addr, alen, glbl);
 	if (!err) {
-		dev->mc_count--;
-
 		/*
 		 *	We have altered the list, so the card
 		 *	loaded filter is now wrong. Fix it
@@ -96,11 +95,9 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
 	int err;
 
 	netif_tx_lock_bh(dev);
-	err = __dev_addr_add(&dev->mc_list, addr, alen, glbl);
-	if (!err) {
-		dev->mc_count++;
+	err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
+	if (!err)
 		__dev_set_rx_mode(dev);
-	}
 	netif_tx_unlock_bh(dev);
 	return err;
 }
-- 
cgit v0.10.2


From d0410051164bbbc597e15f068b53c06a954ae0d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 2 Jul 2007 22:07:22 -0700
Subject: [TCP]: SACK fastpath did override adjusted fackets_out
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Do same adjustment to SACK fastpath counters provided that
they're valid.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 53232dd..20aea15 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -699,6 +699,14 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 			tp->fackets_out -= diff;
 			if ((int)tp->fackets_out < 0)
 				tp->fackets_out = 0;
+			/* SACK fastpath might overwrite it unless dealt with */
+			if (tp->fastpath_skb_hint != NULL &&
+			    after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq,
+				  TCP_SKB_CB(skb)->seq)) {
+				tp->fastpath_cnt_hint -= diff;
+				if ((int)tp->fastpath_cnt_hint < 0)
+					tp->fastpath_cnt_hint = 0;
+			}
 		}
 	}
 
-- 
cgit v0.10.2


From eef6caf8a916f32f8d9b2a02d4fa7674736c00ac Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Mon, 2 Jul 2007 22:36:38 -0700
Subject: [MAC80211]: Set low initial rate in rc80211_simple

The initial rate for STA's using rc80211_simple is set to the last
rate in the rate table. For situations for which the signal is weak,
the rate may be too high for authentication and association. Although
the rc80211_simple module will adjust the speed, the response may not
be fast enough for a successful connection. This modification sets the
initial rate to the lowest supported value.

Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c
index 2048cfd..5ae7fc4 100644
--- a/net/mac80211/rc80211_simple.c
+++ b/net/mac80211/rc80211_simple.c
@@ -283,14 +283,16 @@ static void rate_control_simple_rate_init(void *priv, void *priv_sta,
 	int i;
 	sta->txrate = 0;
 	mode = local->oper_hw_mode;
-	/* TODO: what is a good starting rate for STA? About middle? Maybe not
-	 * the lowest or the highest rate.. Could consider using RSSI from
-	 * previous packets? Need to have IEEE 802.1X auth succeed immediately
-	 * after assoc.. */
+	/* TODO: This routine should consider using RSSI from previous packets
+	 * as we need to have IEEE 802.1X auth succeed immediately after assoc..
+	 * Until that method is implemented, we will use the lowest supported rate
+	 * as a workaround, */
 	for (i = 0; i < mode->num_rates; i++) {
 		if ((sta->supp_rates & BIT(i)) &&
-		    (mode->rates[i].flags & IEEE80211_RATE_SUPPORTED))
+		    (mode->rates[i].flags & IEEE80211_RATE_SUPPORTED)) {
 			sta->txrate = i;
+			break;
+		}
 	}
 }
 
-- 
cgit v0.10.2


From 16dab72f65a6aab0aa72866e00c91b58a2794082 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Mon, 2 Jul 2007 22:39:50 -0700
Subject: [PKTGEN]: Centralize packet overhead tracking

Track the extra packet overhead for VLAN tags, MPLS, IPSEC etc

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index dffe067..9f0a780 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -228,6 +228,7 @@ struct pktgen_dev {
 
 	int min_pkt_size;	/* = ETH_ZLEN; */
 	int max_pkt_size;	/* = ETH_ZLEN; */
+	int pkt_overhead;	/* overhead for MPLS, VLANs, IPSEC etc */
 	int nfrags;
 	__u32 delay_us;		/* Default delay */
 	__u32 delay_ns;
@@ -2075,6 +2076,13 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us)
 	pkt_dev->idle_acc += now - start;
 }
 
+static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
+{
+	pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32);
+	pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev);
+	pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
+}
+
 /* Increment/randomize headers according to flags and current values
  * for IP src/dest, UDP src/dst port, MAC-Addr src/dst
  */
@@ -2323,9 +2331,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 
 	datalen = (odev->hard_header_len + 16) & ~0xf;
 	skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + datalen +
-			pkt_dev->nr_labels*sizeof(u32) +
-			VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev),
-			GFP_ATOMIC);
+			pkt_dev->pkt_overhead, GFP_ATOMIC);
 	if (!skb) {
 		sprintf(pkt_dev->result, "No memory");
 		return NULL;
@@ -2368,7 +2374,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 
 	/* Eth + IPh + UDPh + mpls */
 	datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 -
-		  pkt_dev->nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
+		  pkt_dev->pkt_overhead;
 	if (datalen < sizeof(struct pktgen_hdr))
 		datalen = sizeof(struct pktgen_hdr);
 
@@ -2391,8 +2397,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	iph->check = ip_fast_csum((void *)iph, iph->ihl);
 	skb->protocol = protocol;
 	skb->mac_header = (skb->network_header - ETH_HLEN -
-			   pkt_dev->nr_labels * sizeof(u32) -
-			   VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
+			   pkt_dev->pkt_overhead);
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
 
@@ -2662,9 +2667,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	mod_cur_headers(pkt_dev);
 
 	skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 +
-			pkt_dev->nr_labels*sizeof(u32) +
-			VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev),
-			GFP_ATOMIC);
+			pkt_dev->pkt_overhead, GFP_ATOMIC);
 	if (!skb) {
 		sprintf(pkt_dev->result, "No memory");
 		return NULL;
@@ -2708,7 +2711,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	/* Eth + IPh + UDPh + mpls */
 	datalen = pkt_dev->cur_pkt_size - 14 -
 		  sizeof(struct ipv6hdr) - sizeof(struct udphdr) -
-		  pkt_dev->nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
+		  pkt_dev->pkt_overhead;
 
 	if (datalen < sizeof(struct pktgen_hdr)) {
 		datalen = sizeof(struct pktgen_hdr);
@@ -2738,8 +2741,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr);
 
 	skb->mac_header = (skb->network_header - ETH_HLEN -
-			   pkt_dev->nr_labels * sizeof(u32) -
-			   VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
+			   pkt_dev->pkt_overhead);
 	skb->protocol = protocol;
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
@@ -2857,6 +2859,7 @@ static void pktgen_run(struct pktgen_thread *t)
 			pkt_dev->started_at = getCurUs();
 			pkt_dev->next_tx_us = getCurUs();	/* Transmit immediately */
 			pkt_dev->next_tx_ns = 0;
+			set_pkt_overhead(pkt_dev);
 
 			strcpy(pkt_dev->result, "Starting");
 			started++;
-- 
cgit v0.10.2


From 007a531b0a0c902392a3deff730acd28ce6625c7 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Mon, 2 Jul 2007 22:40:36 -0700
Subject: [PKTGEN]: Introduce sequential flows

By default all flows in pktgen are randomly selected.
This patch introduces ability to have all defined flows to
be sent sequentially. Robert defined randomness to be the
default behavior.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 9f0a780..683da70 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -181,6 +181,7 @@
 #define F_MPLS_RND    (1<<8)	/* Random MPLS labels */
 #define F_VID_RND     (1<<9)	/* Random VLAN ID */
 #define F_SVID_RND    (1<<10)	/* Random SVLAN ID */
+#define F_FLOW_SEQ    (1<<11)	/* Sequential flows */
 
 /* Thread control flag bits */
 #define T_TERMINATE   (1<<0)
@@ -207,8 +208,12 @@ static struct proc_dir_entry *pg_proc_dir = NULL;
 struct flow_state {
 	__be32 cur_daddr;
 	int count;
+	__u32 flags;
 };
 
+/* flow flag bits */
+#define F_INIT   (1<<0)		/* flow has been initialized */
+
 struct pktgen_dev {
 	/*
 	 * Try to keep frequent/infrequent used vars. separated.
@@ -342,6 +347,7 @@ struct pktgen_dev {
 	unsigned cflows;	/* Concurrent flows (config) */
 	unsigned lflow;		/* Flow length  (config) */
 	unsigned nflows;	/* accumulated flows (stats) */
+	unsigned curfl;		/* current sequenced flow (state)*/
 
 	char result[512];
 };
@@ -691,6 +697,13 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 	if (pkt_dev->flags & F_MPLS_RND)
 		seq_printf(seq,  "MPLS_RND  ");
 
+	if (pkt_dev->cflows) {
+		if (pkt_dev->flags & F_FLOW_SEQ)
+			seq_printf(seq,  "FLOW_SEQ  "); /*in sequence flows*/
+		else
+			seq_printf(seq,  "FLOW_RND  ");
+	}
+
 	if (pkt_dev->flags & F_MACSRC_RND)
 		seq_printf(seq, "MACSRC_RND  ");
 
@@ -1182,6 +1195,9 @@ static ssize_t pktgen_if_write(struct file *file,
 		else if (strcmp(f, "!SVID_RND") == 0)
 			pkt_dev->flags &= ~F_SVID_RND;
 
+		else if (strcmp(f, "FLOW_SEQ") == 0)
+			pkt_dev->flags |= F_FLOW_SEQ;
+
 		else if (strcmp(f, "!IPV6") == 0)
 			pkt_dev->flags &= ~F_IPV6;
 
@@ -1190,7 +1206,7 @@ static ssize_t pktgen_if_write(struct file *file,
 				"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
 				f,
 				"IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
-				"MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND\n");
+				"MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ\n");
 			return count;
 		}
 		sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
@@ -2083,6 +2099,37 @@ static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
 	pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
 }
 
+static inline int f_seen(struct pktgen_dev *pkt_dev, int flow)
+{
+
+	if (pkt_dev->flows[flow].flags & F_INIT)
+		return 1;
+	else
+		return 0;
+}
+
+static inline int f_pick(struct pktgen_dev *pkt_dev)
+{
+	int flow = pkt_dev->curfl;
+
+	if (pkt_dev->flags & F_FLOW_SEQ) {
+		if (pkt_dev->flows[flow].count >= pkt_dev->lflow) {
+			/* reset time */
+			pkt_dev->flows[flow].count = 0;
+			pkt_dev->curfl += 1;
+			if (pkt_dev->curfl >= pkt_dev->cflows)
+				pkt_dev->curfl = 0; /*reset */
+		}
+	} else {
+		flow = random32() % pkt_dev->cflows;
+
+		if (pkt_dev->flows[flow].count > pkt_dev->lflow)
+			pkt_dev->flows[flow].count = 0;
+	}
+
+	return pkt_dev->curfl;
+}
+
 /* Increment/randomize headers according to flags and current values
  * for IP src/dest, UDP src/dst port, MAC-Addr src/dst
  */
@@ -2092,12 +2139,8 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 	__u32 imx;
 	int flow = 0;
 
-	if (pkt_dev->cflows) {
-		flow = random32() % pkt_dev->cflows;
-
-		if (pkt_dev->flows[flow].count > pkt_dev->lflow)
-			pkt_dev->flows[flow].count = 0;
-	}
+	if (pkt_dev->cflows)
+		flow = f_pick(pkt_dev);
 
 	/*  Deal with source MAC */
 	if (pkt_dev->src_mac_count > 1) {
@@ -2213,7 +2256,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 			pkt_dev->cur_saddr = htonl(t);
 		}
 
-		if (pkt_dev->cflows && pkt_dev->flows[flow].count != 0) {
+		if (pkt_dev->cflows && f_seen(pkt_dev, flow)) {
 			pkt_dev->cur_daddr = pkt_dev->flows[flow].cur_daddr;
 		} else {
 			imn = ntohl(pkt_dev->daddr_min);
@@ -2243,6 +2286,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 				}
 			}
 			if (pkt_dev->cflows) {
+				pkt_dev->flows[flow].flags |= F_INIT;
 				pkt_dev->flows[flow].cur_daddr =
 				    pkt_dev->cur_daddr;
 				pkt_dev->nflows++;
-- 
cgit v0.10.2


From 628529b6ee334fedc8d25ce56205bb99566572b9 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Mon, 2 Jul 2007 22:41:14 -0700
Subject: [XFRM] Introduce standalone SAD lookup

This allows other in-kernel functions to do SAD lookups.
The only known user at the moment is pktgen.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index ee3827f..d3a898b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -928,6 +928,10 @@ extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t
 					  struct flowi *fl, struct xfrm_tmpl *tmpl,
 					  struct xfrm_policy *pol, int *err,
 					  unsigned short family);
+extern struct xfrm_state * xfrm_stateonly_find(xfrm_address_t *daddr,
+					       xfrm_address_t *saddr,
+					       unsigned short family,
+					       u8 mode, u8 proto, u32 reqid);
 extern int xfrm_state_check_expire(struct xfrm_state *x);
 extern void xfrm_state_insert(struct xfrm_state *x);
 extern int xfrm_state_add(struct xfrm_state *x);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index dfacb9c..e070c3f 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -686,6 +686,37 @@ out:
 	return x;
 }
 
+struct xfrm_state *
+xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
+		    unsigned short family, u8 mode, u8 proto, u32 reqid)
+{
+	unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
+	struct xfrm_state *rx = NULL, *x = NULL;
+	struct hlist_node *entry;
+
+	spin_lock(&xfrm_state_lock);
+	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+		if (x->props.family == family &&
+		    x->props.reqid == reqid &&
+		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
+		    xfrm_state_addr_check(x, daddr, saddr, family) &&
+		    mode == x->props.mode &&
+		    proto == x->id.proto &&
+		    x->km.state == XFRM_STATE_VALID) {
+			rx = x;
+			break;
+		}
+	}
+
+	if (rx)
+		xfrm_state_hold(rx);
+	spin_unlock(&xfrm_state_lock);
+
+
+	return rx;
+}
+EXPORT_SYMBOL(xfrm_stateonly_find);
+
 static void __xfrm_state_insert(struct xfrm_state *x)
 {
 	unsigned int h;
-- 
cgit v0.10.2


From a553e4a6317b2cfc7659542c10fe43184ffe53da Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Mon, 2 Jul 2007 22:41:59 -0700
Subject: [PKTGEN]: IPSEC support

Added transport mode ESP support for starters.  I will send more of
these modes and types once i have resolved the tunnel mode isses.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 683da70..7521533 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -152,6 +152,9 @@
 #include <net/checksum.h>
 #include <net/ipv6.h>
 #include <net/addrconf.h>
+#ifdef CONFIG_XFRM
+#include <net/xfrm.h>
+#endif
 #include <asm/byteorder.h>
 #include <linux/rcupdate.h>
 #include <asm/bitops.h>
@@ -182,6 +185,7 @@
 #define F_VID_RND     (1<<9)	/* Random VLAN ID */
 #define F_SVID_RND    (1<<10)	/* Random SVLAN ID */
 #define F_FLOW_SEQ    (1<<11)	/* Sequential flows */
+#define F_IPSEC_ON    (1<<12)	/* ipsec on for flows */
 
 /* Thread control flag bits */
 #define T_TERMINATE   (1<<0)
@@ -208,6 +212,9 @@ static struct proc_dir_entry *pg_proc_dir = NULL;
 struct flow_state {
 	__be32 cur_daddr;
 	int count;
+#ifdef CONFIG_XFRM
+	struct xfrm_state *x;
+#endif
 	__u32 flags;
 };
 
@@ -348,7 +355,10 @@ struct pktgen_dev {
 	unsigned lflow;		/* Flow length  (config) */
 	unsigned nflows;	/* accumulated flows (stats) */
 	unsigned curfl;		/* current sequenced flow (state)*/
-
+#ifdef CONFIG_XFRM
+	__u8	ipsmode;		/* IPSEC mode (config) */
+	__u8	ipsproto;		/* IPSEC type (config) */
+#endif
 	char result[512];
 };
 
@@ -704,6 +714,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 			seq_printf(seq,  "FLOW_RND  ");
 	}
 
+#ifdef CONFIG_XFRM
+	if (pkt_dev->flags & F_IPSEC_ON)
+		seq_printf(seq,  "IPSEC  ");
+#endif
+
 	if (pkt_dev->flags & F_MACSRC_RND)
 		seq_printf(seq, "MACSRC_RND  ");
 
@@ -1198,6 +1213,11 @@ static ssize_t pktgen_if_write(struct file *file,
 		else if (strcmp(f, "FLOW_SEQ") == 0)
 			pkt_dev->flags |= F_FLOW_SEQ;
 
+#ifdef CONFIG_XFRM
+		else if (strcmp(f, "IPSEC") == 0)
+			pkt_dev->flags |= F_IPSEC_ON;
+#endif
+
 		else if (strcmp(f, "!IPV6") == 0)
 			pkt_dev->flags &= ~F_IPV6;
 
@@ -1206,7 +1226,7 @@ static ssize_t pktgen_if_write(struct file *file,
 				"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
 				f,
 				"IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
-				"MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ\n");
+				"MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n");
 			return count;
 		}
 		sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
@@ -2094,6 +2114,7 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us)
 
 static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
 {
+	pkt_dev->pkt_overhead = 0;
 	pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32);
 	pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev);
 	pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
@@ -2130,6 +2151,31 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
 	return pkt_dev->curfl;
 }
 
+
+#ifdef CONFIG_XFRM
+/* If there was already an IPSEC SA, we keep it as is, else
+ * we go look for it ...
+*/
+inline
+void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
+{
+	struct xfrm_state *x = pkt_dev->flows[flow].x;
+	if (!x) {
+		/*slow path: we dont already have xfrm_state*/
+		x = xfrm_stateonly_find((xfrm_address_t *)&pkt_dev->cur_daddr,
+					(xfrm_address_t *)&pkt_dev->cur_saddr,
+					AF_INET,
+					pkt_dev->ipsmode,
+					pkt_dev->ipsproto, 0);
+		if (x) {
+			pkt_dev->flows[flow].x = x;
+			set_pkt_overhead(pkt_dev);
+			pkt_dev->pkt_overhead+=x->props.header_len;
+		}
+
+	}
+}
+#endif
 /* Increment/randomize headers according to flags and current values
  * for IP src/dest, UDP src/dst port, MAC-Addr src/dst
  */
@@ -2289,6 +2335,10 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 				pkt_dev->flows[flow].flags |= F_INIT;
 				pkt_dev->flows[flow].cur_daddr =
 				    pkt_dev->cur_daddr;
+#ifdef CONFIG_XFRM
+				if (pkt_dev->flags & F_IPSEC_ON)
+					get_ipsec_sa(pkt_dev, flow);
+#endif
 				pkt_dev->nflows++;
 			}
 		}
@@ -2329,6 +2379,91 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 	pkt_dev->flows[flow].count++;
 }
 
+
+#ifdef CONFIG_XFRM
+static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
+{
+	struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
+	int err = 0;
+	struct iphdr *iph;
+
+	if (!x)
+		return 0;
+	/* XXX: we dont support tunnel mode for now until
+	 * we resolve the dst issue */
+	if (x->props.mode != XFRM_MODE_TRANSPORT)
+		return 0;
+
+	spin_lock(&x->lock);
+	iph = ip_hdr(skb);
+
+	err = x->mode->output(x, skb);
+	if (err)
+		goto error;
+	err = x->type->output(x, skb);
+	if (err)
+		goto error;
+
+	x->curlft.bytes +=skb->len;
+	x->curlft.packets++;
+	spin_unlock(&x->lock);
+
+error:
+	spin_unlock(&x->lock);
+	return err;
+}
+
+static inline void free_SAs(struct pktgen_dev *pkt_dev)
+{
+	if (pkt_dev->cflows) {
+		/* let go of the SAs if we have them */
+		int i = 0;
+		for (;  i < pkt_dev->nflows; i++){
+			struct xfrm_state *x = pkt_dev->flows[i].x;
+			if (x) {
+				xfrm_state_put(x);
+				pkt_dev->flows[i].x = NULL;
+			}
+		}
+	}
+}
+
+static inline int process_ipsec(struct pktgen_dev *pkt_dev,
+			      struct sk_buff *skb, __be16 protocol)
+{
+	if (pkt_dev->flags & F_IPSEC_ON) {
+		struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
+		int nhead = 0;
+		if (x) {
+			int ret;
+			__u8 *eth;
+			nhead = x->props.header_len - skb_headroom(skb);
+			if (nhead >0) {
+				ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
+				if (ret < 0) {
+					printk("Error expanding ipsec packet %d\n",ret);
+					return 0;
+				}
+			}
+
+			/* ipsec is not expecting ll header */
+			skb_pull(skb, ETH_HLEN);
+			ret = pktgen_output_ipsec(skb, pkt_dev);
+			if (ret) {
+				printk("Error creating ipsec packet %d\n",ret);
+				kfree_skb(skb);
+				return 0;
+			}
+			/* restore ll */
+			eth = (__u8 *) skb_push(skb, ETH_HLEN);
+			memcpy(eth, pkt_dev->hh, 12);
+			*(u16 *) & eth[12] = protocol;
+		}
+	}
+	return 1;
+}
+#endif
+
 static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev)
 {
 	unsigned i;
@@ -2512,6 +2647,11 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 		pgh->tv_usec = htonl(timestamp.tv_usec);
 	}
 
+#ifdef CONFIG_XFRM
+	if (!process_ipsec(pkt_dev, skb, protocol))
+		return NULL;
+#endif
+
 	return skb;
 }
 
@@ -3497,11 +3637,18 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 	}
 	pkt_dev->entry->proc_fops = &pktgen_if_fops;
 	pkt_dev->entry->data = pkt_dev;
+#ifdef CONFIG_XFRM
+	pkt_dev->ipsmode = XFRM_MODE_TRANSPORT;
+	pkt_dev->ipsproto = IPPROTO_ESP;
+#endif
 
 	return add_dev_to_thread(t, pkt_dev);
 out2:
 	dev_put(pkt_dev->odev);
 out1:
+#ifdef CONFIG_XFRM
+	free_SAs(pkt_dev);
+#endif
 	if (pkt_dev->flows)
 		vfree(pkt_dev->flows);
 	kfree(pkt_dev);
@@ -3596,6 +3743,9 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 	if (pkt_dev->entry)
 		remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
 
+#ifdef CONFIG_XFRM
+	free_SAs(pkt_dev);
+#endif
 	if (pkt_dev->flows)
 		vfree(pkt_dev->flows);
 	kfree(pkt_dev);
-- 
cgit v0.10.2


From 876d48aabf30e4981653f1a0a7ae1e262b8c8b6f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 2 Jul 2007 22:46:07 -0700
Subject: [NET_SCHED]: Remove CONFIG_NET_ESTIMATOR option

The generic estimator is always built in anways and all the config options
does is prevent including a minimal amount of code for setting it up.
Additionally the option is already automatically selected for most cases.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f321794..b466288 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -286,7 +286,6 @@ config CLS_U32_MARK
 config NET_CLS_RSVP
 	tristate "IPv4 Resource Reservation Protocol (RSVP)"
 	select NET_CLS
-	select NET_ESTIMATOR
 	---help---
 	  The Resource Reservation Protocol (RSVP) permits end systems to
 	  request a minimum and maximum data flow rate for a connection; this
@@ -301,7 +300,6 @@ config NET_CLS_RSVP
 config NET_CLS_RSVP6
 	tristate "IPv6 Resource Reservation Protocol (RSVP6)"
 	select NET_CLS
-	select NET_ESTIMATOR
 	---help---
 	  The Resource Reservation Protocol (RSVP) permits end systems to
 	  request a minimum and maximum data flow rate for a connection; this
@@ -393,7 +391,6 @@ config NET_EMATCH_TEXT
 
 config NET_CLS_ACT
 	bool "Actions"
-	select NET_ESTIMATOR
 	---help---
 	  Say Y here if you want to use traffic control actions. Actions
 	  get attached to classifiers and are invoked after a successful
@@ -476,7 +473,6 @@ config NET_ACT_SIMP
 config NET_CLS_POLICE
 	bool "Traffic Policing (obsolete)"
 	depends on NET_CLS_ACT!=y
-	select NET_ESTIMATOR
 	---help---
 	  Say Y here if you want to do traffic policing, i.e. strict
 	  bandwidth limiting. This option is obsoleted by the traffic
@@ -491,14 +487,6 @@ config NET_CLS_IND
 	  classification based on the incoming device. This option is
 	  likely to disappear in favour of the metadata ematch.
 
-config NET_ESTIMATOR
-	bool "Rate estimator"
-	---help---
-	  Say Y here to allow using rate estimators to estimate the current
-	  rate-of-flow for network devices, queues, etc. This module is
-	  automatically selected if needed but can be selected manually for
-	  statistical purposes.
-
 endif # NET_SCHED
 
 endmenu
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 711dd26..72bb9bd 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -42,10 +42,8 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 			write_lock_bh(hinfo->lock);
 			*p1p = p->tcfc_next;
 			write_unlock_bh(hinfo->lock);
-#ifdef CONFIG_NET_ESTIMATOR
 			gen_kill_estimator(&p->tcfc_bstats,
 					   &p->tcfc_rate_est);
-#endif
 			kfree(p);
 			return;
 		}
@@ -236,11 +234,9 @@ struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_acti
 	p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
 	p->tcfc_tm.install = jiffies;
 	p->tcfc_tm.lastuse = jiffies;
-#ifdef CONFIG_NET_ESTIMATOR
 	if (est)
 		gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
 				  p->tcfc_stats_lock, est);
-#endif
 	a->priv = (void *) p;
 	return p;
 }
@@ -614,9 +610,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
 			goto errout;
 
 	if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 ||
-#ifdef CONFIG_NET_ESTIMATOR
 	    gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 ||
-#endif
 	    gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0)
 		goto errout;
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 616f465..580698d 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -118,10 +118,8 @@ void tcf_police_destroy(struct tcf_police *p)
 			write_lock_bh(&police_lock);
 			*p1p = p->tcf_next;
 			write_unlock_bh(&police_lock);
-#ifdef CONFIG_NET_ESTIMATOR
 			gen_kill_estimator(&p->tcf_bstats,
 					   &p->tcf_rate_est);
-#endif
 			if (p->tcfp_R_tab)
 				qdisc_put_rtab(p->tcfp_R_tab);
 			if (p->tcfp_P_tab)
@@ -227,7 +225,6 @@ override:
 		police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
 	police->tcf_action = parm->action;
 
-#ifdef CONFIG_NET_ESTIMATOR
 	if (tb[TCA_POLICE_AVRATE-1])
 		police->tcfp_ewma_rate =
 			*(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
@@ -235,7 +232,6 @@ override:
 		gen_replace_estimator(&police->tcf_bstats,
 				      &police->tcf_rate_est,
 				      police->tcf_stats_lock, est);
-#endif
 
 	spin_unlock_bh(&police->tcf_lock);
 	if (ret != ACT_P_CREATED)
@@ -281,14 +277,12 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
 	police->tcf_bstats.bytes += skb->len;
 	police->tcf_bstats.packets++;
 
-#ifdef CONFIG_NET_ESTIMATOR
 	if (police->tcfp_ewma_rate &&
 	    police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
 		police->tcf_qstats.overlimits++;
 		spin_unlock(&police->tcf_lock);
 		return police->tcf_action;
 	}
-#endif
 
 	if (skb->len <= police->tcfp_mtu) {
 		if (police->tcfp_R_tab == NULL) {
@@ -348,10 +342,8 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	if (police->tcfp_result)
 		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
 			&police->tcfp_result);
-#ifdef CONFIG_NET_ESTIMATOR
 	if (police->tcfp_ewma_rate)
 		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
-#endif
 	return skb->len;
 
 rtattr_failure:
@@ -477,14 +469,12 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
 			goto failure;
 		police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
 	}
-#ifdef CONFIG_NET_ESTIMATOR
 	if (tb[TCA_POLICE_AVRATE-1]) {
 		if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
 			goto failure;
 		police->tcfp_ewma_rate =
 			*(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
 	}
-#endif
 	police->tcfp_toks = police->tcfp_burst = parm->burst;
 	police->tcfp_mtu = parm->mtu;
 	if (police->tcfp_mtu == 0) {
@@ -498,11 +488,9 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
 	police->tcf_index = parm->index ? parm->index :
 		tcf_police_new_index();
 	police->tcf_action = parm->action;
-#ifdef CONFIG_NET_ESTIMATOR
 	if (est)
 		gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est,
 				  police->tcf_stats_lock, est);
-#endif
 	h = tcf_hash(police->tcf_index, POL_TAB_MASK);
 	write_lock_bh(&police_lock);
 	police->tcf_next = tcf_police_ht[h];
@@ -528,14 +516,12 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *police)
 	police->tcf_bstats.bytes += skb->len;
 	police->tcf_bstats.packets++;
 
-#ifdef CONFIG_NET_ESTIMATOR
 	if (police->tcfp_ewma_rate &&
 	    police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
 		police->tcf_qstats.overlimits++;
 		spin_unlock(&police->tcf_lock);
 		return police->tcf_action;
 	}
-#endif
 	if (skb->len <= police->tcfp_mtu) {
 		if (police->tcfp_R_tab == NULL) {
 			spin_unlock(&police->tcf_lock);
@@ -591,10 +577,8 @@ int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
 	if (police->tcfp_result)
 		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
 			&police->tcfp_result);
-#ifdef CONFIG_NET_ESTIMATOR
 	if (police->tcfp_ewma_rate)
 		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
-#endif
 	return skb->len;
 
 rtattr_failure:
@@ -612,9 +596,7 @@ int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police)
 		goto errout;
 
 	if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 ||
-#ifdef CONFIG_NET_ESTIMATOR
 	    gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 ||
-#endif
 	    gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0)
 		goto errout;
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index bec600a..0f9e1c7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -515,7 +515,6 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
 	sch->handle = handle;
 
 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
-#ifdef CONFIG_NET_ESTIMATOR
 		if (tca[TCA_RATE-1]) {
 			err = gen_new_estimator(&sch->bstats, &sch->rate_est,
 						sch->stats_lock,
@@ -531,7 +530,6 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
 				goto err_out3;
 			}
 		}
-#endif
 		qdisc_lock_tree(dev);
 		list_add_tail(&sch->list, &dev->qdisc_list);
 		qdisc_unlock_tree(dev);
@@ -559,11 +557,9 @@ static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
 		if (err)
 			return err;
 	}
-#ifdef CONFIG_NET_ESTIMATOR
 	if (tca[TCA_RATE-1])
 		gen_replace_estimator(&sch->bstats, &sch->rate_est,
 			sch->stats_lock, tca[TCA_RATE-1]);
-#endif
 	return 0;
 }
 
@@ -839,9 +835,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 		goto rtattr_failure;
 
 	if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
-#ifdef CONFIG_NET_ESTIMATOR
 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
-#endif
 	    gnet_stats_copy_queue(&d, &q->qstats) < 0)
 		goto rtattr_failure;
 
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index ee2d596..bf1ea9e 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1653,9 +1653,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 		cl->xstats.undertime = cl->undertime - q->now;
 
 	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
-#ifdef CONFIG_NET_ESTIMATOR
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-#endif
 	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
 		return -1;
 
@@ -1726,9 +1724,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
 	tcf_destroy_chain(cl->filter_list);
 	qdisc_destroy(cl->q);
 	qdisc_put_rtab(cl->R_tab);
-#ifdef CONFIG_NET_ESTIMATOR
 	gen_kill_estimator(&cl->bstats, &cl->rate_est);
-#endif
 	if (cl != &q->link)
 		kfree(cl);
 }
@@ -1873,11 +1869,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
 
 		sch_tree_unlock(sch);
 
-#ifdef CONFIG_NET_ESTIMATOR
 		if (tca[TCA_RATE-1])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
 				cl->stats_lock, tca[TCA_RATE-1]);
-#endif
 		return 0;
 	}
 
@@ -1963,11 +1957,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
 		cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
 	sch_tree_unlock(sch);
 
-#ifdef CONFIG_NET_ESTIMATOR
 	if (tca[TCA_RATE-1])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
 			cl->stats_lock, tca[TCA_RATE-1]);
-#endif
 
 	*arg = (unsigned long)cl;
 	return 0;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2488dbb..e525fd7 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -514,9 +514,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 		return;
 
 	list_del(&qdisc->list);
-#ifdef CONFIG_NET_ESTIMATOR
 	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
-#endif
 	if (ops->reset)
 		ops->reset(qdisc);
 	if (ops->destroy)
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 9d124c4..7ccdf63 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1054,11 +1054,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		}
 		sch_tree_unlock(sch);
 
-#ifdef CONFIG_NET_ESTIMATOR
 		if (tca[TCA_RATE-1])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
 				cl->stats_lock, tca[TCA_RATE-1]);
-#endif
 		return 0;
 	}
 
@@ -1112,11 +1110,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	cl->cl_pcvtoff = parent->cl_cvtoff;
 	sch_tree_unlock(sch);
 
-#ifdef CONFIG_NET_ESTIMATOR
 	if (tca[TCA_RATE-1])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
 			cl->stats_lock, tca[TCA_RATE-1]);
-#endif
 	*arg = (unsigned long)cl;
 	return 0;
 }
@@ -1128,9 +1124,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
 
 	tcf_destroy_chain(cl->filter_list);
 	qdisc_destroy(cl->qdisc);
-#ifdef CONFIG_NET_ESTIMATOR
 	gen_kill_estimator(&cl->bstats, &cl->rate_est);
-#endif
 	if (cl != &q->root)
 		kfree(cl);
 }
@@ -1384,9 +1378,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	xstats.rtwork  = cl->cl_cumul;
 
 	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
-#ifdef CONFIG_NET_ESTIMATOR
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-#endif
 	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
 		return -1;
 
-- 
cgit v0.10.2


From 4bdf39911e7a887c4499161422423cbaf16684e8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 2 Jul 2007 22:47:37 -0700
Subject: [NET_SCHED]: Remove unnecessary stats_lock pointers

Remove stats_lock pointers from qdisc-internal structures, in all cases
it points to dev->queue_lock. The only case where it is necessary is for
top-level qdiscs, where it might also point to dev->ingress_lock in case
of the ingress qdisc. Also remove it from actions completely, it always
points to the actions internal lock.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 8b06c2f..2f0273f 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -19,7 +19,6 @@ struct tcf_common {
 	struct gnet_stats_basic		tcfc_bstats;
 	struct gnet_stats_queue		tcfc_qstats;
 	struct gnet_stats_rate_est	tcfc_rate_est;
-	spinlock_t			*tcfc_stats_lock;
 	spinlock_t			tcfc_lock;
 };
 #define tcf_next	common.tcfc_next
@@ -32,7 +31,6 @@ struct tcf_common {
 #define tcf_bstats	common.tcfc_bstats
 #define tcf_qstats	common.tcfc_qstats
 #define tcf_rate_est	common.tcfc_rate_est
-#define tcf_stats_lock	common.tcfc_stats_lock
 #define tcf_lock	common.tcfc_lock
 
 struct tcf_police {
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 72bb9bd..32cc191 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -230,13 +230,12 @@ struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_acti
 		p->tcfc_bindcnt = 1;
 
 	spin_lock_init(&p->tcfc_lock);
-	p->tcfc_stats_lock = &p->tcfc_lock;
 	p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
 	p->tcfc_tm.install = jiffies;
 	p->tcfc_tm.lastuse = jiffies;
 	if (est)
 		gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
-				  p->tcfc_stats_lock, est);
+				  &p->tcfc_lock, est);
 	a->priv = (void *) p;
 	return p;
 }
@@ -595,12 +594,12 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
 	if (compat_mode) {
 		if (a->type == TCA_OLD_COMPAT)
 			err = gnet_stats_start_copy_compat(skb, 0,
-				TCA_STATS, TCA_XSTATS, h->tcf_stats_lock, &d);
+				TCA_STATS, TCA_XSTATS, &h->tcf_lock, &d);
 		else
 			return 0;
 	} else
 		err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
-			h->tcf_stats_lock, &d);
+					    &h->tcf_lock, &d);
 
 	if (err < 0)
 		goto errout;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 580698d..3e8716d 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -183,7 +183,6 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
 	ret = ACT_P_CREATED;
 	police->tcf_refcnt = 1;
 	spin_lock_init(&police->tcf_lock);
-	police->tcf_stats_lock = &police->tcf_lock;
 	if (bind)
 		police->tcf_bindcnt = 1;
 override:
@@ -231,7 +230,7 @@ override:
 	if (est)
 		gen_replace_estimator(&police->tcf_bstats,
 				      &police->tcf_rate_est,
-				      police->tcf_stats_lock, est);
+				      &police->tcf_lock, est);
 
 	spin_unlock_bh(&police->tcf_lock);
 	if (ret != ACT_P_CREATED)
@@ -450,7 +449,6 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
 
 	police->tcf_refcnt = 1;
 	spin_lock_init(&police->tcf_lock);
-	police->tcf_stats_lock = &police->tcf_lock;
 	if (parm->rate.rate) {
 		police->tcfp_R_tab =
 			qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
@@ -490,7 +488,7 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
 	police->tcf_action = parm->action;
 	if (est)
 		gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est,
-				  police->tcf_stats_lock, est);
+				  &police->tcf_lock, est);
 	h = tcf_hash(police->tcf_index, POL_TAB_MASK);
 	write_lock_bh(&police_lock);
 	police->tcf_next = tcf_police_ht[h];
@@ -591,7 +589,7 @@ int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police)
 	struct gnet_dump d;
 
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
-					 TCA_XSTATS, police->tcf_stats_lock,
+					 TCA_XSTATS, &police->tcf_lock,
 					 &d) < 0)
 		goto errout;
 
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index d1c383f..16fe802 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -71,7 +71,6 @@ struct atm_flow_data {
 	int			ref;		/* reference count */
 	struct gnet_stats_basic	bstats;
 	struct gnet_stats_queue	qstats;
-	spinlock_t		*stats_lock;
 	struct atm_flow_data	*next;
 	struct atm_flow_data	*excess;	/* flow for excess traffic;
 						   NULL to set CLP instead */
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index bf1ea9e..b093d8f 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -148,7 +148,6 @@ struct cbq_class
 	struct gnet_stats_basic bstats;
 	struct gnet_stats_queue qstats;
 	struct gnet_stats_rate_est rate_est;
-	spinlock_t		*stats_lock;
 	struct tc_cbq_xstats	xstats;
 
 	struct tcf_proto	*filter_list;
@@ -1442,7 +1441,6 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
 	q->link.ewma_log = TC_CBQ_DEF_EWMA;
 	q->link.avpkt = q->link.allot/2;
 	q->link.minidle = -0x7FFFFFFF;
-	q->link.stats_lock = &sch->dev->queue_lock;
 
 	qdisc_watchdog_init(&q->watchdog, sch);
 	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
@@ -1871,7 +1869,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
 
 		if (tca[TCA_RATE-1])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-				cl->stats_lock, tca[TCA_RATE-1]);
+					      &sch->dev->queue_lock,
+					      tca[TCA_RATE-1]);
 		return 0;
 	}
 
@@ -1929,7 +1928,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
 	cl->allot = parent->allot;
 	cl->quantum = cl->allot;
 	cl->weight = cl->R_tab->rate.rate;
-	cl->stats_lock = &sch->dev->queue_lock;
 
 	sch_tree_lock(sch);
 	cbq_link_class(cl);
@@ -1959,7 +1957,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
 
 	if (tca[TCA_RATE-1])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-			cl->stats_lock, tca[TCA_RATE-1]);
+				  &sch->dev->queue_lock, tca[TCA_RATE-1]);
 
 	*arg = (unsigned long)cl;
 	return 0;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 7ccdf63..7130a24 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -122,7 +122,6 @@ struct hfsc_class
 	struct gnet_stats_basic bstats;
 	struct gnet_stats_queue qstats;
 	struct gnet_stats_rate_est rate_est;
-	spinlock_t	*stats_lock;
 	unsigned int	level;		/* class level in hierarchy */
 	struct tcf_proto *filter_list;	/* filter list */
 	unsigned int	filter_cnt;	/* filter count */
@@ -1056,7 +1055,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 		if (tca[TCA_RATE-1])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-				cl->stats_lock, tca[TCA_RATE-1]);
+					      &sch->dev->queue_lock,
+					      tca[TCA_RATE-1]);
 		return 0;
 	}
 
@@ -1096,7 +1096,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
 	if (cl->qdisc == NULL)
 		cl->qdisc = &noop_qdisc;
-	cl->stats_lock = &sch->dev->queue_lock;
 	INIT_LIST_HEAD(&cl->children);
 	cl->vt_tree = RB_ROOT;
 	cl->cf_tree = RB_ROOT;
@@ -1112,7 +1111,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (tca[TCA_RATE-1])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-			cl->stats_lock, tca[TCA_RATE-1]);
+				  &sch->dev->queue_lock, tca[TCA_RATE-1]);
 	*arg = (unsigned long)cl;
 	return 0;
 }
@@ -1440,8 +1439,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
 		return -EINVAL;
 	qopt = RTA_DATA(opt);
 
-	sch->stats_lock = &sch->dev->queue_lock;
-
 	q->defcls = qopt->defcls;
 	for (i = 0; i < HFSC_HSIZE; i++)
 		INIT_LIST_HEAD(&q->clhash[i]);
@@ -1456,7 +1453,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
 					  sch->handle);
 	if (q->root.qdisc == NULL)
 		q->root.qdisc = &noop_qdisc;
-	q->root.stats_lock = &sch->dev->queue_lock;
 	INIT_LIST_HEAD(&q->root.children);
 	q->root.vt_tree = RB_ROOT;
 	q->root.cf_tree = RB_ROOT;
-- 
cgit v0.10.2


From ee39e10c27ca5293c72addb95bff864095e19904 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 2 Jul 2007 22:48:13 -0700
Subject: [NET_SCHED]: sch_htb: use generic estimator

Use the generic estimator instead of reimplementing (parts of) it.
For compatibility always create a default estimator for new classes.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 035788c..26f81b8 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -69,8 +69,6 @@
 */
 
 #define HTB_HSIZE 16		/* classid hash size */
-#define HTB_EWMAC 2		/* rate average over HTB_EWMAC*HTB_HSIZE sec */
-#define HTB_RATECM 1		/* whether to use rate computer */
 #define HTB_HYSTERESIS 1	/* whether to use mode hysteresis for speedup */
 #define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
 
@@ -95,12 +93,6 @@ struct htb_class {
 	struct tc_htb_xstats xstats;	/* our special stats */
 	int refcnt;		/* usage count of this class */
 
-#ifdef HTB_RATECM
-	/* rate measurement counters */
-	unsigned long rate_bytes, sum_bytes;
-	unsigned long rate_packets, sum_packets;
-#endif
-
 	/* topology */
 	int level;		/* our level (see above) */
 	struct htb_class *parent;	/* parent class */
@@ -194,10 +186,6 @@ struct htb_sched {
 	int rate2quantum;	/* quant = rate / rate2quantum */
 	psched_time_t now;	/* cached dequeue time */
 	struct qdisc_watchdog watchdog;
-#ifdef HTB_RATECM
-	struct timer_list rttim;	/* rate computer timer */
-	int recmp_bucket;	/* which hash bucket to recompute next */
-#endif
 
 	/* non shaped skbs; let them go directly thru */
 	struct sk_buff_head direct_queue;
@@ -677,34 +665,6 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_SUCCESS;
 }
 
-#ifdef HTB_RATECM
-#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
-static void htb_rate_timer(unsigned long arg)
-{
-	struct Qdisc *sch = (struct Qdisc *)arg;
-	struct htb_sched *q = qdisc_priv(sch);
-	struct hlist_node *p;
-	struct htb_class *cl;
-
-
-	/* lock queue so that we can muck with it */
-	spin_lock_bh(&sch->dev->queue_lock);
-
-	q->rttim.expires = jiffies + HZ;
-	add_timer(&q->rttim);
-
-	/* scan and recompute one bucket at time */
-	if (++q->recmp_bucket >= HTB_HSIZE)
-		q->recmp_bucket = 0;
-
-	hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) {
-		RT_GEN(cl->sum_bytes, cl->rate_bytes);
-		RT_GEN(cl->sum_packets, cl->rate_packets);
-	}
-	spin_unlock_bh(&sch->dev->queue_lock);
-}
-#endif
-
 /**
  * htb_charge_class - charges amount "bytes" to leaf and ancestors
  *
@@ -750,11 +710,6 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 			if (cl->cmode != HTB_CAN_SEND)
 				htb_add_to_wait_tree(q, cl, diff);
 		}
-#ifdef HTB_RATECM
-		/* update rate counters */
-		cl->sum_bytes += bytes;
-		cl->sum_packets++;
-#endif
 
 		/* update byte stats except for leaves which are already updated */
 		if (cl->level) {
@@ -1095,13 +1050,6 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
 	if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */
 		q->direct_qlen = 2;
 
-#ifdef HTB_RATECM
-	init_timer(&q->rttim);
-	q->rttim.function = htb_rate_timer;
-	q->rttim.data = (unsigned long)sch;
-	q->rttim.expires = jiffies + HZ;
-	add_timer(&q->rttim);
-#endif
 	if ((q->rate2quantum = gopt->rate2quantum) < 1)
 		q->rate2quantum = 1;
 	q->defcls = gopt->defcls;
@@ -1175,11 +1123,6 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 {
 	struct htb_class *cl = (struct htb_class *)arg;
 
-#ifdef HTB_RATECM
-	cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE);
-	cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE);
-#endif
-
 	if (!cl->level && cl->un.leaf.q)
 		cl->qstats.qlen = cl->un.leaf.q->q.qlen;
 	cl->xstats.tokens = cl->tokens;
@@ -1277,6 +1220,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 		BUG_TRAP(cl->un.leaf.q);
 		qdisc_destroy(cl->un.leaf.q);
 	}
+	gen_kill_estimator(&cl->bstats, &cl->rate_est);
 	qdisc_put_rtab(cl->rate);
 	qdisc_put_rtab(cl->ceil);
 
@@ -1305,9 +1249,6 @@ static void htb_destroy(struct Qdisc *sch)
 	struct htb_sched *q = qdisc_priv(sch);
 
 	qdisc_watchdog_cancel(&q->watchdog);
-#ifdef HTB_RATECM
-	del_timer_sync(&q->rttim);
-#endif
 	/* This line used to be after htb_destroy_class call below
 	   and surprisingly it worked in 2.4. But it must precede it
 	   because filter need its target class alive to be able to call
@@ -1403,6 +1344,20 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	if (!cl) {		/* new class */
 		struct Qdisc *new_q;
 		int prio;
+		struct {
+			struct rtattr		rta;
+			struct gnet_estimator	opt;
+		} est = {
+			.rta = {
+				.rta_len	= RTA_LENGTH(sizeof(est.opt)),
+				.rta_type	= TCA_RATE,
+			},
+			.opt = {
+				/* 4s interval, 16s averaging constant */
+				.interval	= 2,
+				.ewma_log	= 2,
+			},
+		};
 
 		/* check for valid classid */
 		if (!classid || TC_H_MAJ(classid ^ sch->handle)
@@ -1418,6 +1373,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
 			goto failure;
 
+		gen_new_estimator(&cl->bstats, &cl->rate_est,
+				  &sch->dev->queue_lock,
+				  tca[TCA_RATE-1] ? : &est.rta);
 		cl->refcnt = 1;
 		INIT_LIST_HEAD(&cl->sibling);
 		INIT_HLIST_NODE(&cl->hlist);
@@ -1469,8 +1427,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
 		list_add_tail(&cl->sibling,
 			      parent ? &parent->children : &q->root);
-	} else
+	} else {
+		if (tca[TCA_RATE-1])
+			gen_replace_estimator(&cl->bstats, &cl->rate_est,
+					      &sch->dev->queue_lock,
+					      tca[TCA_RATE-1]);
 		sch_tree_lock(sch);
+	}
 
 	/* it used to be a nasty bug here, we have to check that node
 	   is really leaf before changing cl->un.leaf ! */
-- 
cgit v0.10.2


From 0ba48053831d5b89ee2afaefaae1c06eae80cb05 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 2 Jul 2007 22:49:07 -0700
Subject: [NET_SCHED]: Remove unnecessary includes

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 32cc191..feef366 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -11,23 +11,13 @@
  *
  */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
-#include <net/sock.h>
 #include <net/sch_generic.h>
 #include <net/act_api.h>
 #include <net/netlink.h>
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 7517f37..a9631e4 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -10,26 +10,15 @@
  *
  */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/proc_fs.h>
 #include <net/netlink.h>
-#include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_gact.h>
 #include <net/tc_act/tc_gact.h>
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 00b05f4..6b407ec 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -11,27 +11,15 @@
  * Copyright:	Jamal Hadi Salim (2002-4)
  */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/proc_fs.h>
-#include <linux/kmod.h>
 #include <net/netlink.h>
-#include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_ipt.h>
 #include <net/tc_act/tc_ipt.h>
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index de21c92..5795789 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -12,31 +12,19 @@
  *
  */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/proc_fs.h>
 #include <net/netlink.h>
-#include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_mirred.h>
 #include <net/tc_act/tc_mirred.h>
 
-#include <linux/etherdevice.h>
 #include <linux/if_arp.h>
 
 #define MIRRED_TAB_MASK     7
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 6f8684b..b46fab5 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -9,26 +9,15 @@
  * Authors:	Jamal Hadi Salim (2002-4)
  */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/proc_fs.h>
 #include <net/netlink.h>
-#include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_pedit.h>
 #include <net/tc_act/tc_pedit.h>
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 3e8716d..d204038 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -10,25 +10,15 @@
  * 		J Hadi Salim (action changes)
  */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/module.h>
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
-#include <net/sock.h>
 #include <net/act_api.h>
 #include <net/netlink.h>
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 36e1eda..fb84ef3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -13,7 +13,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <net/netlink.h>
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ebf94ed..36b72aa 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -14,26 +14,16 @@
  *
  */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
 #include <linux/netlink.h>
 #include <net/netlink.h>
-#include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index c885412..8dbcf27 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -13,7 +13,6 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index bbec4a0..8adbd6a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -19,29 +19,12 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
-#include <linux/netfilter.h>
-#include <net/ip.h>
-#include <net/netlink.h>
-#include <net/route.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
+#include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index cc941d0..0a8409c 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -10,28 +10,14 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
-#include <net/ip.h>
-#include <net/netlink.h>
-#include <net/route.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
+#include <net/dst.h>
+#include <net/route.h>
+#include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
index 0a683c0..cbb5e0d 100644
--- a/net/sched/cls_rsvp.c
+++ b/net/sched/cls_rsvp.c
@@ -10,27 +10,12 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
-#include <net/ip.h>
-#include <net/route.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
+#include <net/ip.h>
 #include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
index 93b6abe..dd08aea 100644
--- a/net/sched/cls_rsvp6.c
+++ b/net/sched/cls_rsvp6.c
@@ -10,28 +10,12 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
-#include <net/ip.h>
 #include <linux/ipv6.h>
-#include <net/route.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 #include <net/netlink.h>
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 47ac0c5..2314820 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -9,12 +9,9 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <net/ip.h>
 #include <net/act_api.h>
 #include <net/netlink.h>
 #include <net/pkt_cls.h>
-#include <net/route.h>
 
 
 /*
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index c7a347b..77961e2 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -30,30 +30,14 @@
  *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
  */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
 #include <linux/rtnetlink.h>
-#include <net/ip.h>
-#include <net/netlink.h>
-#include <net/route.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
+#include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 63146d3..2483739 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -84,9 +84,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/mm.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 #include <net/pkt_cls.h>
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 0f9e1c7..d92ea26 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -19,30 +19,18 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/kmod.h>
 #include <linux/list.h>
-#include <linux/bitops.h>
 #include <linux/hrtimer.h>
 
 #include <net/netlink.h>
-#include <net/sock.h>
 #include <net/pkt_sched.h>
 
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
 			struct Qdisc *old, struct Qdisc *new);
 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 16fe802..54b92d2 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -8,15 +8,12 @@
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/skbuff.h>
-#include <linux/interrupt.h>
 #include <linux/atmdev.h>
 #include <linux/atmclip.h>
-#include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/file.h> /* for fput */
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
-#include <net/sock.h>
 
 
 extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index cb0c456..f914fc4 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/pkt_sched.h>
 
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index b093d8f..b184c35 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -11,28 +11,12 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
-#include <net/ip.h>
-#include <net/netlink.h>
-#include <net/route.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
+#include <net/netlink.h>
 #include <net/pkt_sched.h>
 
 
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 3c6fd18..4d2c233 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -9,7 +9,6 @@
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/skbuff.h>
-#include <linux/netdevice.h> /* for pkt_sched */
 #include <linux/rtnetlink.h>
 #include <net/pkt_sched.h>
 #include <net/dsfield.h>
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index c2689f4..c264308 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -13,7 +13,6 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/pkt_sched.h>
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e525fd7..c81649c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -11,27 +11,19 @@
  *              - Ingress support
  */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/rcupdate.h>
 #include <linux/list.h>
-#include <net/sock.h>
 #include <net/pkt_sched.h>
 
 /* Main transmission queue. */
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index fa1b4fe..3cc6dda 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -21,7 +21,6 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/pkt_sched.h>
 #include <net/red.h>
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 7130a24..874452c 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -53,7 +53,6 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/jiffies.h>
 #include <linux/compiler.h>
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
@@ -62,13 +61,11 @@
 #include <linux/list.h>
 #include <linux/rbtree.h>
 #include <linux/init.h>
-#include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/pkt_sched.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
-#include <asm/system.h>
 #include <asm/div64.h>
 
 /*
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 26f81b8..c031486 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -28,32 +28,16 @@
  * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
  */
 #include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
-#include <net/ip.h>
-#include <net/route.h>
 #include <linux/skbuff.h>
 #include <linux/list.h>
 #include <linux/compiler.h>
+#include <linux/rbtree.h>
 #include <net/netlink.h>
-#include <net/sock.h>
 #include <net/pkt_sched.h>
-#include <linux/rbtree.h>
 
 /* HTB algorithm.
     Author: devik@cdi.cz
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index f8b9f1c..cd0aab6 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -9,21 +9,14 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/list.h>
 #include <linux/skbuff.h>
-#include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter.h>
-#include <linux/smp.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
-#include <asm/byteorder.h>
-#include <asm/uaccess.h>
-#include <linux/kmod.h>
-#include <linux/stat.h>
-#include <linux/interrupt.h>
-#include <linux/list.h>
 
 
 #undef DEBUG_INGRESS
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 5d9d8bc..9e5e87e 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -14,11 +14,9 @@
  */
 
 #include <linux/module.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 4045220..2d8c084 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -12,28 +12,12 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
-#include <net/ip.h>
-#include <net/route.h>
 #include <linux/skbuff.h>
 #include <net/netlink.h>
-#include <net/sock.h>
 #include <net/pkt_sched.h>
 
 
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 00db53e..9b95fef 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -17,7 +17,6 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 96dfdf7..9579573 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -10,31 +10,17 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/jiffies.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
 #include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
 #include <linux/init.h>
-#include <net/ip.h>
-#include <net/netlink.h>
 #include <linux/ipv6.h>
-#include <net/route.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
+#include <net/ip.h>
+#include <net/netlink.h>
 #include <net/pkt_sched.h>
 
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 5386295..22e431d 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -13,29 +13,12 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/jiffies.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
-#include <net/ip.h>
-#include <net/netlink.h>
-#include <net/route.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
+#include <net/netlink.h>
 #include <net/pkt_sched.h>
 
 
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index dfe7e45..0968184 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -9,30 +9,17 @@
  */
 
 #include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
 #include <linux/if_arp.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
 #include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
 #include <linux/init.h>
-#include <net/ip.h>
-#include <net/route.h>
 #include <linux/skbuff.h>
 #include <linux/moduleparam.h>
-#include <net/sock.h>
+#include <net/dst.h>
+#include <net/neighbour.h>
 #include <net/pkt_sched.h>
 
 /*
@@ -225,7 +212,6 @@ static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt)
 	return 0;
 }
 
-/* "teql*" netdevice routines */
 
 static int
 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
-- 
cgit v0.10.2


From 8c644623fe7e41f59fe97cdf666cba3cb7ced7d8 Mon Sep 17 00:00:00 2001
From: Guido Guenther <agx@sigxcpu.org>
Date: Mon, 2 Jul 2007 22:50:25 -0700
Subject: [NET]: Allow group ownership of TUN/TAP devices.

Introduce a new syscall TUNSETGROUP for group ownership setting of tap
devices. The user now is allowed to send packages if either his euid or
his egid matches the one specified via tunctl (via -u or -g
respecitvely). If both, gid and uid, are set via tunctl, both have to
match.

Signed-off-by: Guido Guenther <agx@sigxcpu.org>
Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a2c6caa..62b2b30 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -432,6 +432,7 @@ static void tun_setup(struct net_device *dev)
 	init_waitqueue_head(&tun->read_wait);
 
 	tun->owner = -1;
+	tun->group = -1;
 
 	SET_MODULE_OWNER(dev);
 	dev->open = tun_net_open;
@@ -467,8 +468,11 @@ static int tun_set_iff(struct file *file, struct ifreq *ifr)
 			return -EBUSY;
 
 		/* Check permissions */
-		if (tun->owner != -1 &&
-		    current->euid != tun->owner && !capable(CAP_NET_ADMIN))
+		if (((tun->owner != -1 &&
+		      current->euid != tun->owner) ||
+		     (tun->group != -1 &&
+		      current->egid != tun->group)) &&
+		     !capable(CAP_NET_ADMIN))
 			return -EPERM;
 	}
 	else if (__dev_get_by_name(ifr->ifr_name))
@@ -610,6 +614,13 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 		DBG(KERN_INFO "%s: owner set to %d\n", tun->dev->name, tun->owner);
 		break;
 
+	case TUNSETGROUP:
+		/* Set group of the device */
+		tun->group= (gid_t) arg;
+
+		DBG(KERN_INFO "%s: group set to %d\n", tun->dev->name, tun->group);
+		break;
+
 	case TUNSETLINK:
 		/* Only allow setting the type when the interface is down */
 		if (tun->dev->flags & IFF_UP) {
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 88aef7b..42eb694 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -36,6 +36,7 @@ struct tun_struct {
 	unsigned long 		flags;
 	int			attached;
 	uid_t			owner;
+	gid_t			group;
 
 	wait_queue_head_t	read_wait;
 	struct sk_buff_head	readq;
@@ -78,6 +79,7 @@ struct tun_struct {
 #define TUNSETPERSIST _IOW('T', 203, int) 
 #define TUNSETOWNER   _IOW('T', 204, int)
 #define TUNSETLINK    _IOW('T', 205, int)
+#define TUNSETGROUP   _IOW('T', 206, int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
-- 
cgit v0.10.2


From 89da1ecf5483e6aa29b456a15ad6d05a6797c5a5 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@sortiz.org>
Date: Mon, 2 Jul 2007 22:54:18 -0700
Subject: [IrDA]: Netlink layer.

First IrDA configuration netlink layer implementation.
Currently, we only support the set/get mode commands.

Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/irda.h b/include/linux/irda.h
index 945ba31..35911bd 100644
--- a/include/linux/irda.h
+++ b/include/linux/irda.h
@@ -216,6 +216,33 @@ struct if_irda_req {
 #define ifr_dtr       ifr_ifru.ifru_line.dtr
 #define ifr_rts       ifr_ifru.ifru_line.rts
 
+
+/* IrDA netlink definitions */
+#define IRDA_NL_NAME "irda"
+#define IRDA_NL_VERSION 1
+
+enum irda_nl_commands {
+	IRDA_NL_CMD_UNSPEC,
+	IRDA_NL_CMD_SET_MODE,
+	IRDA_NL_CMD_GET_MODE,
+
+	__IRDA_NL_CMD_AFTER_LAST
+};
+#define IRDA_NL_CMD_MAX (__IRDA_NL_CMD_AFTER_LAST - 1)
+
+enum nl80211_attrs {
+	IRDA_NL_ATTR_UNSPEC,
+	IRDA_NL_ATTR_IFNAME,
+	IRDA_NL_ATTR_MODE,
+
+	__IRDA_NL_ATTR_AFTER_LAST
+};
+#define IRDA_NL_ATTR_MAX (__IRDA_NL_ATTR_AFTER_LAST - 1)
+
+/* IrDA modes */
+#define IRDA_MODE_PRIMARY   0x1
+#define IRDA_MODE_SECONDARY 0x2
+
 #endif /* KERNEL_IRDA_H */
 
 
diff --git a/include/net/irda/irda.h b/include/net/irda/irda.h
index 36bee44..0838755 100644
--- a/include/net/irda/irda.h
+++ b/include/net/irda/irda.h
@@ -125,6 +125,9 @@ extern void irda_sysctl_unregister(void);
 extern int irsock_init(void);
 extern void irsock_cleanup(void);
 
+extern int irda_nl_register(void);
+extern void irda_nl_unregister(void);
+
 extern int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev,
 			    struct packet_type *ptype,
 			    struct net_device *orig_dev);
diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h
index a3d370e..9d0c78ea 100644
--- a/include/net/irda/irlap.h
+++ b/include/net/irda/irlap.h
@@ -208,6 +208,8 @@ struct irlap_cb {
 	int    xbofs_delay;   /* Nr of XBOF's used to MTT */
 	int    bofs_count;    /* Negotiated extra BOFs */
 	int    next_bofs;     /* Negotiated extra BOFs after next frame */
+
+	int    mode;     /* IrLAP mode (primary, secondary or monitor) */
 };
 
 /* 
diff --git a/net/irda/Makefile b/net/irda/Makefile
index d1366c2..187f6c5 100644
--- a/net/irda/Makefile
+++ b/net/irda/Makefile
@@ -10,6 +10,6 @@ obj-$(CONFIG_IRCOMM) += ircomm/
 irda-y := iriap.o iriap_event.o irlmp.o irlmp_event.o irlmp_frame.o \
           irlap.o irlap_event.o irlap_frame.o timer.o qos.o irqueue.o \
           irttp.o irda_device.o irias_object.o wrapper.o af_irda.o \
-	  discovery.o parameters.o irmod.o
+	  discovery.o parameters.o irnetlink.o irmod.o
 irda-$(CONFIG_PROC_FS) += irproc.o
 irda-$(CONFIG_SYSCTL) += irsysctl.o
diff --git a/net/irda/irmod.c b/net/irda/irmod.c
index c7fad2c..1900937 100644
--- a/net/irda/irmod.c
+++ b/net/irda/irmod.c
@@ -88,16 +88,23 @@ EXPORT_SYMBOL(irda_notify_init);
  */
 static int __init irda_init(void)
 {
+	int ret = 0;
+
 	IRDA_DEBUG(0, "%s()\n", __FUNCTION__);
 
 	/* Lower layer of the stack */
 	irlmp_init();
 	irlap_init();
 
+	/* Driver/dongle support */
+	irda_device_init();
+
 	/* Higher layers of the stack */
 	iriap_init();
 	irttp_init();
-	irsock_init();
+	ret = irsock_init();
+	if (ret < 0)
+		goto out_err_1;
 
 	/* Add IrDA packet type (Start receiving packets) */
 	dev_add_pack(&irda_packet_type);
@@ -107,13 +114,44 @@ static int __init irda_init(void)
 	irda_proc_register();
 #endif
 #ifdef CONFIG_SYSCTL
-	irda_sysctl_register();
+	ret = irda_sysctl_register();
+	if (ret < 0)
+		goto out_err_2;
 #endif
 
-	/* Driver/dongle support */
-	irda_device_init();
+	ret = irda_nl_register();
+	if (ret < 0)
+		goto out_err_3;
 
 	return 0;
+
+ out_err_3:
+#ifdef CONFIG_SYSCTL
+	irda_sysctl_unregister();
+#endif
+ out_err_2:
+#ifdef CONFIG_PROC_FS
+	irda_proc_unregister();
+#endif
+
+	/* Remove IrDA packet type (stop receiving packets) */
+	dev_remove_pack(&irda_packet_type);
+
+	/* Remove higher layers */
+	irsock_cleanup();
+ out_err_1:
+	irttp_cleanup();
+	iriap_cleanup();
+
+	/* Remove lower layers */
+	irda_device_cleanup();
+	irlap_cleanup(); /* Must be done before irlmp_cleanup()! DB */
+
+	/* Remove middle layer */
+	irlmp_cleanup();
+
+
+	return ret;
 }
 
 /*
@@ -125,6 +163,8 @@ static int __init irda_init(void)
 static void __exit irda_cleanup(void)
 {
 	/* Remove External APIs */
+	irda_nl_unregister();
+
 #ifdef CONFIG_SYSCTL
 	irda_sysctl_unregister();
 #endif
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
new file mode 100644
index 0000000..db71658
--- /dev/null
+++ b/net/irda/irnetlink.c
@@ -0,0 +1,170 @@
+/*
+ * IrDA netlink layer, for stack configuration.
+ *
+ * Copyright (c) 2007 Samuel Ortiz <samuel@sortiz>
+ *
+ * Partly based on the 802.11 nelink implementation
+ * (see net/wireless/nl80211.c) which is:
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/socket.h>
+#include <linux/irda.h>
+#include <net/sock.h>
+#include <net/irda/irda.h>
+#include <net/irda/irlap.h>
+#include <net/genetlink.h>
+
+
+
+static struct genl_family irda_nl_family = {
+	.id = GENL_ID_GENERATE,
+	.name = IRDA_NL_NAME,
+	.hdrsize = 0,
+	.version = IRDA_NL_VERSION,
+	.maxattr = IRDA_NL_CMD_MAX,
+};
+
+static struct net_device * ifname_to_netdev(struct genl_info *info)
+{
+	char * ifname;
+
+	if (!info->attrs[IRDA_NL_ATTR_IFNAME])
+		return NULL;
+
+	ifname = nla_data(info->attrs[IRDA_NL_ATTR_IFNAME]);
+
+	IRDA_DEBUG(5, "%s(): Looking for %s\n", __FUNCTION__, ifname);
+
+	return dev_get_by_name(ifname);
+}
+
+static int irda_nl_set_mode(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net_device * dev;
+	struct irlap_cb * irlap;
+	u32 mode;
+
+	if (!info->attrs[IRDA_NL_ATTR_MODE])
+		return -EINVAL;
+
+	mode = nla_get_u32(info->attrs[IRDA_NL_ATTR_MODE]);
+
+	IRDA_DEBUG(5, "%s(): Switching to mode: %d\n", __FUNCTION__, mode);
+
+	dev = ifname_to_netdev(info);
+	if (!dev)
+		return -ENODEV;
+
+	irlap = (struct irlap_cb *)dev->atalk_ptr;
+	if (!irlap) {
+		dev_put(dev);
+		return -ENODEV;
+	}
+
+	irlap->mode = mode;
+
+	dev_put(dev);
+
+	return 0;
+}
+
+static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net_device * dev;
+	struct irlap_cb * irlap;
+	struct sk_buff *msg;
+	void *hdr;
+	int ret = -ENOBUFS;
+
+	dev = ifname_to_netdev(info);
+	if (!dev)
+		return -ENODEV;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg) {
+		dev_put(dev);
+		return -ENOMEM;
+	}
+
+	irlap = (struct irlap_cb *)dev->atalk_ptr;
+	if (!irlap) {
+		ret = -ENODEV;
+		goto err_out;
+	}
+
+	hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
+			  &irda_nl_family, 0,  IRDA_NL_CMD_GET_MODE);
+	if (IS_ERR(hdr)) {
+		ret = PTR_ERR(hdr);
+		goto err_out;
+	}
+
+	if(nla_put_string(msg, IRDA_NL_ATTR_IFNAME,
+			  dev->name));
+		goto err_out;
+
+	if(nla_put_u32(msg, IRDA_NL_ATTR_MODE, irlap->mode))
+		goto err_out;
+
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_unicast(msg, info->snd_pid);
+
+ err_out:
+	nlmsg_free(msg);
+	dev_put(dev);
+
+	return ret;
+}
+
+static struct nla_policy irda_nl_policy[IRDA_NL_ATTR_MAX + 1] = {
+	[IRDA_NL_ATTR_IFNAME] = { .type = NLA_NUL_STRING,
+				  .len = IFNAMSIZ-1 },
+	[IRDA_NL_ATTR_MODE] = { .type = NLA_U32 },
+};
+
+static struct genl_ops irda_nl_ops[] = {
+	{
+		.cmd = IRDA_NL_CMD_SET_MODE,
+		.doit = irda_nl_set_mode,
+		.policy = irda_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = IRDA_NL_CMD_GET_MODE,
+		.doit = irda_nl_get_mode,
+		.policy = irda_nl_policy,
+		/* can be retrieved by unprivileged users */
+	},
+
+};
+
+int irda_nl_register(void)
+{
+	int err, i;
+
+	err = genl_register_family(&irda_nl_family);
+	if (err)
+		return err;
+
+	for (i = 0; i < ARRAY_SIZE(irda_nl_ops); i++) {
+		err = genl_register_ops(&irda_nl_family, &irda_nl_ops[i]);
+		if (err)
+			goto err_out;
+	}
+	return 0;
+ err_out:
+	genl_unregister_family(&irda_nl_family);
+	return err;
+}
+
+void irda_nl_unregister(void)
+{
+	genl_unregister_family(&irda_nl_family);
+}
-- 
cgit v0.10.2


From 411725280bd0058ebb83c0e32133b7a94902c3a6 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@sortiz.org>
Date: Mon, 2 Jul 2007 22:55:31 -0700
Subject: [IrDA]: Monitor mode.

Through the IrDA netlink set mode command, we switch to IrDA monitor
mode, where one IrLAP instance receives all the packets on the media,
without ever responding to them.

Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/irda.h b/include/linux/irda.h
index 35911bd..8e37357 100644
--- a/include/linux/irda.h
+++ b/include/linux/irda.h
@@ -242,6 +242,7 @@ enum nl80211_attrs {
 /* IrDA modes */
 #define IRDA_MODE_PRIMARY   0x1
 #define IRDA_MODE_SECONDARY 0x2
+#define IRDA_MODE_MONITOR   0x4
 
 #endif /* KERNEL_IRDA_H */
 
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 3013c49..25a3444 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -101,6 +101,13 @@ void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb)
 
 	irlap_insert_info(self, skb);
 
+	if (unlikely(self->mode & IRDA_MODE_MONITOR)) {
+		IRDA_DEBUG(3, "%s(): %s is in monitor mode\n", __FUNCTION__,
+			   self->netdev->name);
+		dev_kfree_skb(skb);
+		return;
+	}
+
 	dev_queue_xmit(skb);
 }
 
-- 
cgit v0.10.2


From 66f5e51ed5a300291b34bf3e2b1e22ac28ca3631 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@sortiz.org>
Date: Mon, 2 Jul 2007 22:56:15 -0700
Subject: [IrDA]: kingsun-sir.c charset fix.

Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/irda/kingsun-sir.c b/drivers/net/irda/kingsun-sir.c
index 2174291..bdd5c97 100644
--- a/drivers/net/irda/kingsun-sir.c
+++ b/drivers/net/irda/kingsun-sir.c
@@ -4,7 +4,7 @@
 * Version:       0.1.1
 * Description:   Irda KingSun/DonShine USB Dongle
 * Status:        Experimental
-* Author:        Alex Villac�s Lasso <a_villacis@palosanto.com>
+* Author:        Alex Villacís Lasso <a_villacis@palosanto.com>
 *
 *  	Based on stir4200 and mcs7780 drivers, with (strange?) differences
 *
@@ -652,6 +652,6 @@ static void __exit kingsun_cleanup(void)
 }
 module_exit(kingsun_cleanup);
 
-MODULE_AUTHOR("Alex Villac�s Lasso <a_villacis@palosanto.com>");
+MODULE_AUTHOR("Alex Villacís Lasso <a_villacis@palosanto.com>");
 MODULE_DESCRIPTION("IrDA-USB Dongle Driver for KingSun/DonShine");
 MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 93cce3d3657bfb5d04789afcd5104f8c48700f32 Mon Sep 17 00:00:00 2001
From: "G. Liakhovetski" <gl@dsa-ac.de>
Date: Mon, 2 Jul 2007 22:56:57 -0700
Subject: [IrDA]: tsap init routine factorisation.

This patch extracts common code from irttp_open_tsap() and irttp_dup()
into a new function to 1) avoid code duplication, 2) help avoid
forgetting object initialization in the tsap duplication path in the
future.

Signed-off-by: G. Liakhovetski <gl@dsa-ac.de>
Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 7069e4a..ce46475 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -369,6 +369,20 @@ static int irttp_param_max_sdu_size(void *instance, irda_param_t *param,
 /* Everything is happily mixed up. Waiting for next clean up - Jean II */
 
 /*
+ * Initialization, that has to be done on new tsap
+ * instance allocation and on duplication
+ */
+static void irttp_init_tsap(struct tsap_cb *tsap)
+{
+	spin_lock_init(&tsap->lock);
+	init_timer(&tsap->todo_timer);
+
+	skb_queue_head_init(&tsap->rx_queue);
+	skb_queue_head_init(&tsap->tx_queue);
+	skb_queue_head_init(&tsap->rx_fragments);
+}
+
+/*
  * Function irttp_open_tsap (stsap, notify)
  *
  *    Create TSAP connection endpoint,
@@ -395,10 +409,11 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify)
 		IRDA_DEBUG(0, "%s(), unable to kmalloc!\n", __FUNCTION__);
 		return NULL;
 	}
-	spin_lock_init(&self->lock);
+
+	/* Initialize internal objects */
+	irttp_init_tsap(self);
 
 	/* Initialise todo timer */
-	init_timer(&self->todo_timer);
 	self->todo_timer.data     = (unsigned long) self;
 	self->todo_timer.function = &irttp_todo_expired;
 
@@ -418,9 +433,6 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify)
 	self->magic = TTP_TSAP_MAGIC;
 	self->connected = FALSE;
 
-	skb_queue_head_init(&self->rx_queue);
-	skb_queue_head_init(&self->tx_queue);
-	skb_queue_head_init(&self->rx_fragments);
 	/*
 	 *  Create LSAP at IrLMP layer
 	 */
@@ -1455,12 +1467,9 @@ struct tsap_cb *irttp_dup(struct tsap_cb *orig, void *instance)
 
 	/* Not everything should be copied */
 	new->notify.instance = instance;
-	spin_lock_init(&new->lock);
-	init_timer(&new->todo_timer);
 
-	skb_queue_head_init(&new->rx_queue);
-	skb_queue_head_init(&new->tx_queue);
-	skb_queue_head_init(&new->rx_fragments);
+	/* Initialize internal objects */
+	irttp_init_tsap(new);
 
 	/* This is locked */
 	hashbin_insert(irttp->tsaps, (irda_queue_t *) new, (long) new, NULL);
-- 
cgit v0.10.2


From 067b207b281db5e3f03f8d244286c20f61aa2343 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Thu, 5 Jul 2007 17:08:05 -0700
Subject: [UDP]: Cleanup UDP encapsulation code

This cleanup fell out after adding L2TP support where a new encap_rcv
funcptr was added to struct udp_sock. Have XFRM use the new encap_rcv
funcptr, which allows us to move the XFRM encap code from udp.c into
xfrm4_input.c.

Make xfrm4_rcv_encap() static since it is no longer called externally.

Signed-off-by: James Chapman <jchapman@katalix.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index d3a898b..ae959e9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1003,7 +1003,7 @@ extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
 				 u8 **prevhdr);
 
 #ifdef CONFIG_XFRM
-extern int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type);
+extern int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 extern int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen);
 extern int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsigned short family);
 #else
@@ -1012,12 +1012,13 @@ static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optv
  	return -ENOPROTOOPT;
 } 
 
-static inline int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
+static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 {
  	/* should not happen */
  	kfree_skb(skb);
 	return 0;
 }
+
 static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsigned short family)
 {
 	return -EINVAL;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b9276f8..4ec4a25 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -920,108 +920,6 @@ int udp_disconnect(struct sock *sk, int flags)
 	return 0;
 }
 
-/* return:
- * 	1  if the UDP system should process it
- *	0  if we should drop this packet
- * 	-1 if it should get processed by xfrm4_rcv_encap
- *	-2 if it should get processed by l2tp
- */
-static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
-{
-	struct udp_sock *up = udp_sk(sk);
-	struct udphdr *uh;
-	struct iphdr *iph;
-	int iphlen, len;
-
-	__u8 *udpdata;
-	__be32 *udpdata32;
-	__u16 encap_type = up->encap_type;
-
-	/* if we're overly short, let UDP handle it */
-	len = skb->len - sizeof(struct udphdr);
-	if (len <= 0)
-		return 1;
-
-	/* if this is not encapsulated socket, then just return now */
-	if (!encap_type)
-		return 1;
-
-	/* If this is a paged skb, make sure we pull up
-	 * whatever data we need to look at. */
-	if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8)))
-		return 1;
-
-	/* Now we can get the pointers */
-	uh = udp_hdr(skb);
-	udpdata = (__u8 *)uh + sizeof(struct udphdr);
-	udpdata32 = (__be32 *)udpdata;
-
-	switch (encap_type) {
-	default:
-	case UDP_ENCAP_ESPINUDP:
-		/* Check if this is a keepalive packet.  If so, eat it. */
-		if (len == 1 && udpdata[0] == 0xff) {
-			return 0;
-		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
-			/* ESP Packet without Non-ESP header */
-			len = sizeof(struct udphdr);
-		} else
-			/* Must be an IKE packet.. pass it through */
-			return 1;
-		break;
-	case UDP_ENCAP_ESPINUDP_NON_IKE:
-		/* Check if this is a keepalive packet.  If so, eat it. */
-		if (len == 1 && udpdata[0] == 0xff) {
-			return 0;
-		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
-			   udpdata32[0] == 0 && udpdata32[1] == 0) {
-
-			/* ESP Packet with Non-IKE marker */
-			len = sizeof(struct udphdr) + 2 * sizeof(u32);
-		} else
-			/* Must be an IKE packet.. pass it through */
-			return 1;
-		break;
-	case UDP_ENCAP_L2TPINUDP:
-		/* Let caller know to send this to l2tp */
-		return -2;
-	}
-
-#ifndef CONFIG_XFRM
-	return 1;
-#else
-	/* At this point we are sure that this is an ESPinUDP packet,
-	 * so we need to remove 'len' bytes from the packet (the UDP
-	 * header and optional ESP marker bytes) and then modify the
-	 * protocol to ESP, and then call into the transform receiver.
-	 */
-	if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-		return 0;
-
-	/* Now we can update and verify the packet length... */
-	iph = ip_hdr(skb);
-	iphlen = iph->ihl << 2;
-	iph->tot_len = htons(ntohs(iph->tot_len) - len);
-	if (skb->len < iphlen + len) {
-		/* packet is too small!?! */
-		return 0;
-	}
-
-	/* pull the data buffer up to the ESP header and set the
-	 * transport header to point to ESP.  Keep UDP on the stack
-	 * for later.
-	 */
-	__skb_pull(skb, len);
-	skb_reset_transport_header(skb);
-
-	/* modify the protocol (it's ESP!) */
-	iph->protocol = IPPROTO_ESP;
-
-	/* and let the caller know to send this into the ESP processor... */
-	return -1;
-#endif
-}
-
 /* returns:
  *  -1: error
  *   0: success
@@ -1044,44 +942,36 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 
 	if (up->encap_type) {
 		/*
-		 * This is an encapsulation socket, so let's see if this is
-		 * an encapsulated packet.
-		 * If it's a keepalive packet, then just eat it.
-		 * If it's an encapsulateed packet, then pass it to the
-		 * IPsec xfrm input and return the response
-		 * appropriately.  Otherwise, just fall through and
-		 * pass this up the UDP socket.
+		 * This is an encapsulation socket so pass the skb to
+		 * the socket's udp_encap_rcv() hook. Otherwise, just
+		 * fall through and pass this up the UDP socket.
+		 * up->encap_rcv() returns the following value:
+		 * =0 if skb was successfully passed to the encap
+		 *    handler or was discarded by it.
+		 * >0 if skb should be passed on to UDP.
+		 * <0 if skb should be resubmitted as proto -N
 		 */
-		int ret;
+		unsigned int len;
 
-		ret = udp_encap_rcv(sk, skb);
-		if (ret == 0) {
-			/* Eat the packet .. */
-			kfree_skb(skb);
-			return 0;
-		}
-		if (ret == -1) {
-			/* process the ESP packet */
-			ret = xfrm4_rcv_encap(skb, up->encap_type);
-			UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
-			return -ret;
-		}
-		if (ret == -2) {
-			/* process the L2TP packet */
-			if (up->encap_rcv != NULL) {
-				ret = (*up->encap_rcv)(sk, skb);
-				if (ret <= 0) {
-					UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
-					return ret;
-				}
-
-				/* FALLTHROUGH -- pass up as UDP packet */
+		/* if we're overly short, let UDP handle it */
+		len = skb->len - sizeof(struct udphdr);
+		if (len <= 0)
+			goto udp;
+
+		if (up->encap_rcv != NULL) {
+			int ret;
+
+			ret = (*up->encap_rcv)(sk, skb);
+			if (ret <= 0) {
+				UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
+				return -ret;
 			}
 		}
 
 		/* FALLTHROUGH -- it's a UDP Packet */
 	}
 
+udp:
 	/*
 	 * 	UDP-Lite specific tests, ignored on UDP sockets
 	 */
@@ -1367,6 +1257,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		case 0:
 		case UDP_ENCAP_ESPINUDP:
 		case UDP_ENCAP_ESPINUDP_NON_IKE:
+			up->encap_rcv = xfrm4_udp_encap_rcv;
+			/* FALLTHROUGH */
 		case UDP_ENCAP_L2TPINUDP:
 			up->encap_type = val;
 			break;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index fa1902d..2fa1082 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -16,13 +16,6 @@
 #include <net/ip.h>
 #include <net/xfrm.h>
 
-int xfrm4_rcv(struct sk_buff *skb)
-{
-	return xfrm4_rcv_encap(skb, 0);
-}
-
-EXPORT_SYMBOL(xfrm4_rcv);
-
 static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
 {
 	switch (nexthdr) {
@@ -53,7 +46,7 @@ drop:
 }
 #endif
 
-int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
+static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 {
 	__be32 spi, seq;
 	struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
@@ -167,3 +160,108 @@ drop:
 	kfree_skb(skb);
 	return 0;
 }
+
+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct udp_sock *up = udp_sk(sk);
+	struct udphdr *uh;
+	struct iphdr *iph;
+	int iphlen, len;
+	int ret;
+
+	__u8 *udpdata;
+	__be32 *udpdata32;
+	__u16 encap_type = up->encap_type;
+
+	/* if this is not encapsulated socket, then just return now */
+	if (!encap_type)
+		return 1;
+
+	/* If this is a paged skb, make sure we pull up
+	 * whatever data we need to look at. */
+	len = skb->len - sizeof(struct udphdr);
+	if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8)))
+		return 1;
+
+	/* Now we can get the pointers */
+	uh = udp_hdr(skb);
+	udpdata = (__u8 *)uh + sizeof(struct udphdr);
+	udpdata32 = (__be32 *)udpdata;
+
+	switch (encap_type) {
+	default:
+	case UDP_ENCAP_ESPINUDP:
+		/* Check if this is a keepalive packet.  If so, eat it. */
+		if (len == 1 && udpdata[0] == 0xff) {
+			goto drop;
+		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
+			/* ESP Packet without Non-ESP header */
+			len = sizeof(struct udphdr);
+		} else
+			/* Must be an IKE packet.. pass it through */
+			return 1;
+		break;
+	case UDP_ENCAP_ESPINUDP_NON_IKE:
+		/* Check if this is a keepalive packet.  If so, eat it. */
+		if (len == 1 && udpdata[0] == 0xff) {
+			goto drop;
+		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
+			   udpdata32[0] == 0 && udpdata32[1] == 0) {
+
+			/* ESP Packet with Non-IKE marker */
+			len = sizeof(struct udphdr) + 2 * sizeof(u32);
+		} else
+			/* Must be an IKE packet.. pass it through */
+			return 1;
+		break;
+	}
+
+	/* At this point we are sure that this is an ESPinUDP packet,
+	 * so we need to remove 'len' bytes from the packet (the UDP
+	 * header and optional ESP marker bytes) and then modify the
+	 * protocol to ESP, and then call into the transform receiver.
+	 */
+	if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		goto drop;
+
+	/* Now we can update and verify the packet length... */
+	iph = ip_hdr(skb);
+	iphlen = iph->ihl << 2;
+	iph->tot_len = htons(ntohs(iph->tot_len) - len);
+	if (skb->len < iphlen + len) {
+		/* packet is too small!?! */
+		goto drop;
+	}
+
+	/* pull the data buffer up to the ESP header and set the
+	 * transport header to point to ESP.  Keep UDP on the stack
+	 * for later.
+	 */
+	__skb_pull(skb, len);
+	skb_reset_transport_header(skb);
+
+	/* modify the protocol (it's ESP!) */
+	iph->protocol = IPPROTO_ESP;
+
+	/* process ESP */
+	ret = xfrm4_rcv_encap(skb, encap_type);
+	return ret;
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
+int xfrm4_rcv(struct sk_buff *skb)
+{
+	return xfrm4_rcv_encap(skb, 0);
+}
+
+EXPORT_SYMBOL(xfrm4_rcv);
-- 
cgit v0.10.2


From 558585aad0c0ef83d3d14a1c7576b1e404ca1fbc Mon Sep 17 00:00:00 2001
From: Jing Min Zhao <zhaojingmin@vivecode.com>
Date: Sat, 7 Jul 2007 22:13:17 -0700
Subject: [NETFILTER]: nf_conntrack_h323: check range first in sequence
 extension

Check range before checking STOP flag. This optimization may save a
nanosecond or less :)

Signed-off-by: Jing Min Zhao <zhaojingmin@vivecode.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 6b7eaa0..a869403 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -555,15 +555,6 @@ int decode_seq(bitstr_t * bs, field_t * f, char *base, int level)
 
 	/* Decode the extension components */
 	for (opt = 0; opt < bmp2_len; opt++, i++, son++) {
-		if (i < f->ub && son->attr & STOP) {
-			PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
-			      son->name);
-			return H323_ERROR_STOP;
-		}
-
-		if (!((0x80000000 >> opt) & bmp2))	/* Not present */
-			continue;
-
 		/* Check Range */
 		if (i >= f->ub) {	/* Newer Version? */
 			CHECK_BOUND(bs, 2);
@@ -573,6 +564,15 @@ int decode_seq(bitstr_t * bs, field_t * f, char *base, int level)
 			continue;
 		}
 
+		if (son->attr & STOP) {
+			PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
+			      son->name);
+			return H323_ERROR_STOP;
+		}
+
+		if (!((0x80000000 >> opt) & bmp2))	/* Not present */
+			continue;
+
 		CHECK_BOUND(bs, 2);
 		len = get_len(bs);
 		CHECK_BOUND(bs, len);
-- 
cgit v0.10.2


From 7bfe24611671ec76b44281e582b38535e21f01a9 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki@netfilter.org>
Date: Sat, 7 Jul 2007 22:14:23 -0700
Subject: [NETFILTER]: ip6_tables: fix explanation of valid upper protocol
 number

This explains the allowed upper protocol numbers. IP6T_F_NOPROTO was
introduced to use 0 as Hop-by-Hop option header, not wildcard. But that
seemed to be forgotten. 0 has been used as wildcard since 2002-08-23.

Signed-off-by: Yasuyuki Kozakai <yasuyuki@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 4686f83..9a720f0 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -44,8 +44,14 @@ struct ip6t_ip6 {
 	char iniface[IFNAMSIZ], outiface[IFNAMSIZ];
 	unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ];
 
-	/* ARGH, HopByHop uses 0, so can't do 0 = ANY,
-	   instead IP6T_F_NOPROTO must be set */
+	/* Upper protocol number
+	 * - The allowed value is 0 (any) or protocol number of last parsable
+	 *   header, which is 50 (ESP), 59 (No Next Header), 135 (MH), or
+	 *   the non IPv6 extension headers.
+	 * - The protocol numbers of IPv6 extension headers except of ESP and
+	 *   MH do not match any packets.
+	 * - You also need to set IP6T_FLAGS_PROTO to "flags" to check protocol.
+	 */
 	u_int16_t proto;
 	/* TOS to match iff flags & IP6T_F_TOS */
 	u_int8_t tos;
-- 
cgit v0.10.2


From cff533ac12494fa002e2c46acc94d670e5f636a2 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Sat, 7 Jul 2007 22:15:12 -0700
Subject: [NETFILTER]: x_tables: switch hotdrop to bool

Switch the "hotdrop" variables to boolean

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 7e733a6..b8577d1 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -148,7 +148,7 @@ struct xt_match
 		     const void *matchinfo,
 		     int offset,
 		     unsigned int protoff,
-		     int *hotdrop);
+		     bool *hotdrop);
 
 	/* Called when user tries to insert an entry of this type. */
 	/* Should return true or false. */
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index cae4121..1d75a5c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -224,7 +224,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	static const char nulldevname[IFNAMSIZ];
 	unsigned int verdict = NF_DROP;
 	struct arphdr *arp;
-	int hotdrop = 0;
+	bool hotdrop = false;
 	struct arpt_entry *e, *back;
 	const char *indev, *outdev;
 	void *table_base;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 9bacf1a0..e2a8938 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -188,7 +188,7 @@ int do_match(struct ipt_entry_match *m,
 	     const struct net_device *in,
 	     const struct net_device *out,
 	     int offset,
-	     int *hotdrop)
+	     bool *hotdrop)
 {
 	/* Stop iteration if it doesn't match */
 	if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
@@ -216,7 +216,7 @@ ipt_do_table(struct sk_buff **pskb,
 	u_int16_t offset;
 	struct iphdr *ip;
 	u_int16_t datalen;
-	int hotdrop = 0;
+	bool hotdrop = false;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
@@ -2122,7 +2122,7 @@ icmp_match(const struct sk_buff *skb,
 	   const void *matchinfo,
 	   int offset,
 	   unsigned int protoff,
-	   int *hotdrop)
+	   bool *hotdrop)
 {
 	struct icmphdr _icmph, *ic;
 	const struct ipt_icmp *icmpinfo = matchinfo;
@@ -2137,7 +2137,7 @@ icmp_match(const struct sk_buff *skb,
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("Dropping evil ICMP tinygram.\n");
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index a652a14..a9a9b75 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -30,7 +30,7 @@ static inline int match_type(__be32 addr, u_int16_t mask)
 static int match(const struct sk_buff *skb,
 		 const struct net_device *in, const struct net_device *out,
 		 const struct xt_match *match, const void *matchinfo,
-		 int offset, unsigned int protoff, int *hotdrop)
+		 int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ipt_addrtype_info *info = matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 18a1678..9a244e4 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -44,7 +44,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	struct ip_auth_hdr _ahdr, *ah;
 	const struct ipt_ah *ahinfo = matchinfo;
@@ -60,7 +60,7 @@ match(const struct sk_buff *skb,
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("Dropping evil AH tinygram.\n");
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 2621812..a47f374 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -30,7 +30,7 @@ static inline int match_ip(const struct sk_buff *skb,
 
 static inline int match_tcp(const struct sk_buff *skb,
 			    const struct ipt_ecn_info *einfo,
-			    int *hotdrop)
+			    bool *hotdrop)
 {
 	struct tcphdr _tcph, *th;
 
@@ -39,7 +39,7 @@ static inline int match_tcp(const struct sk_buff *skb,
 	 */
 	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
 	if (th == NULL) {
-		*hotdrop = 0;
+		*hotdrop = false;
 		return 0;
 	}
 
@@ -69,7 +69,7 @@ static inline int match_tcp(const struct sk_buff *skb,
 static int match(const struct sk_buff *skb,
 		 const struct net_device *in, const struct net_device *out,
 		 const struct xt_match *match, const void *matchinfo,
-		 int offset, unsigned int protoff, int *hotdrop)
+		 int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ipt_ecn_info *info = matchinfo;
 
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index 33af9e9..86f225c 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -29,7 +29,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const struct xt_match *match,
       const void *matchinfo,
-      int offset, unsigned int protoff, int *hotdrop)
+      int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ipt_iprange_info *info = matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 7fae9aa..92be562 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -29,7 +29,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	const struct ipt_owner_info *info = matchinfo;
 
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 15a9e8b..81f1a01 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -173,7 +173,7 @@ static int
 ipt_recent_match(const struct sk_buff *skb,
 		 const struct net_device *in, const struct net_device *out,
 		 const struct xt_match *match, const void *matchinfo,
-		 int offset, unsigned int protoff, int *hotdrop)
+		 int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ipt_recent_info *info = matchinfo;
 	struct recent_table *t;
@@ -201,7 +201,7 @@ ipt_recent_match(const struct sk_buff *skb,
 			goto out;
 		e = recent_entry_init(t, addr, ttl);
 		if (e == NULL)
-			*hotdrop = 1;
+			*hotdrop = true;
 		ret ^= 1;
 		goto out;
 	}
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index d314844..803ed4c 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -26,7 +26,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	const struct ipt_tos_info *info = matchinfo;
 
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index ab02d9e..e7316b2 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -21,7 +21,7 @@ MODULE_LICENSE("GPL");
 static int match(const struct sk_buff *skb,
 		 const struct net_device *in, const struct net_device *out,
 		 const struct xt_match *match, const void *matchinfo,
-		 int offset, unsigned int protoff, int *hotdrop)
+		 int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ipt_ttl_info *info = matchinfo;
 	const u8 ttl = ip_hdr(skb)->ttl;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 9aa6240..13c66a7 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -102,7 +102,7 @@ ip6_packet_match(const struct sk_buff *skb,
 		 const char *outdev,
 		 const struct ip6t_ip6 *ip6info,
 		 unsigned int *protoff,
-		 int *fragoff, int *hotdrop)
+		 int *fragoff, bool *hotdrop)
 {
 	size_t i;
 	unsigned long ret;
@@ -162,7 +162,7 @@ ip6_packet_match(const struct sk_buff *skb,
 		protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off);
 		if (protohdr < 0) {
 			if (_frag_off == 0)
-				*hotdrop = 1;
+				*hotdrop = true;
 			return 0;
 		}
 		*fragoff = _frag_off;
@@ -225,7 +225,7 @@ int do_match(struct ip6t_entry_match *m,
 	     const struct net_device *out,
 	     int offset,
 	     unsigned int protoff,
-	     int *hotdrop)
+	     bool *hotdrop)
 {
 	/* Stop iteration if it doesn't match */
 	if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
@@ -252,7 +252,7 @@ ip6t_do_table(struct sk_buff **pskb,
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	int offset = 0;
 	unsigned int protoff = 0;
-	int hotdrop = 0;
+	bool hotdrop = false;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
@@ -1299,7 +1299,7 @@ icmp6_match(const struct sk_buff *skb,
 	   const void *matchinfo,
 	   int offset,
 	   unsigned int protoff,
-	   int *hotdrop)
+	   bool *hotdrop)
 {
 	struct icmp6hdr _icmp, *ic;
 	const struct ip6t_icmp *icmpinfo = matchinfo;
@@ -1313,7 +1313,7 @@ icmp6_match(const struct sk_buff *skb,
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		duprintf("Dropping evil ICMP tinygram.\n");
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index d3c1543..27b7bd2 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -49,7 +49,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	struct ip_auth_hdr *ah, _ah;
 	const struct ip6t_ah *ahinfo = matchinfo;
@@ -60,13 +60,13 @@ match(const struct sk_buff *skb,
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*hotdrop = 1;
+			*hotdrop = true;
 		return 0;
 	}
 
 	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
 	if (ah == NULL) {
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 0f3dd93..69e79e1 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -27,7 +27,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	unsigned char eui64[8];
 	int i = 0;
@@ -35,7 +35,7 @@ match(const struct sk_buff *skb,
 	if (!(skb_mac_header(skb) >= skb->head &&
 	      (skb_mac_header(skb) + ETH_HLEN) <= skb->data) &&
 	    offset != 0) {
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 5a5da71..740fdca 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -48,7 +48,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	struct frag_hdr _frag, *fh;
 	const struct ip6t_frag *fraginfo = matchinfo;
@@ -58,13 +58,13 @@ match(const struct sk_buff *skb,
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*hotdrop = 1;
+			*hotdrop = true;
 		return 0;
 	}
 
 	fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
 	if (fh == NULL) {
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index d2373c7..5633de1 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -55,7 +55,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	struct ipv6_opt_hdr _optsh, *oh;
 	const struct ip6t_opts *optinfo = matchinfo;
@@ -71,13 +71,13 @@ match(const struct sk_buff *skb,
 	err = ipv6_find_hdr(skb, &ptr, match->data, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*hotdrop = 1;
+			*hotdrop = true;
 		return 0;
 	}
 
 	oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
 	if (oh == NULL) {
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index d606c0e..cbf49cf 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -22,7 +22,7 @@ MODULE_LICENSE("GPL");
 static int match(const struct sk_buff *skb,
 		 const struct net_device *in, const struct net_device *out,
 		 const struct xt_match *match, const void *matchinfo,
-		 int offset, unsigned int protoff, int *hotdrop)
+		 int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ip6t_hl_info *info = matchinfo;
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index fd6a086..469dec2 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -34,7 +34,7 @@ ipv6header_match(const struct sk_buff *skb,
 		 const void *matchinfo,
 		 int offset,
 		 unsigned int protoff,
-		 int *hotdrop)
+		 bool *hotdrop)
 {
 	const struct ip6t_ipv6header_info *info = matchinfo;
 	unsigned int temp;
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index c2a9098..c27647b 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -48,7 +48,7 @@ match(const struct sk_buff *skb,
 	 const void *matchinfo,
 	 int offset,
 	 unsigned int protoff,
-	 int *hotdrop)
+	 bool *hotdrop)
 {
 	struct ip6_mh _mh, *mh;
 	const struct ip6t_mh *mhinfo = matchinfo;
@@ -62,14 +62,14 @@ match(const struct sk_buff *skb,
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		duprintf("Dropping evil MH tinygram.\n");
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
 	if (mh->ip6mh_proto != IPPROTO_NONE) {
 		duprintf("Dropping invalid MH Payload Proto: %u\n",
 			 mh->ip6mh_proto);
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index 43738bb..f90f7c3 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -31,7 +31,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	const struct ip6t_owner_info *info = matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 81ab00d..2bb8821 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -50,7 +50,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	struct ipv6_rt_hdr _route, *rh;
 	const struct ip6t_rt *rtinfo = matchinfo;
@@ -64,13 +64,13 @@ match(const struct sk_buff *skb,
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*hotdrop = 1;
+			*hotdrop = true;
 		return 0;
 	}
 
 	rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
 	if (rh == NULL) {
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 7db492d..20690ea 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -23,7 +23,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protooff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	/* We always match */
 	return 1;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 804afe5..8fe5775 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -23,7 +23,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	const struct xt_connbytes_info *sinfo = matchinfo;
 	struct nf_conn *ct;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index e180325..8a6d58a 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -38,7 +38,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	const struct xt_connmark_info *info = matchinfo;
 	struct nf_conn *ct;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 189ded5..915c730 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -27,7 +27,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	const struct xt_conntrack_info *sinfo = matchinfo;
 	struct nf_conn *ct;
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 2c9c0de..3172e73 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -36,7 +36,7 @@ dccp_find_option(u_int8_t option,
 		 const struct sk_buff *skb,
 		 unsigned int protoff,
 		 const struct dccp_hdr *dh,
-		 int *hotdrop)
+		 bool *hotdrop)
 {
 	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
 	unsigned char *op;
@@ -45,7 +45,7 @@ dccp_find_option(u_int8_t option,
 	unsigned int i;
 
 	if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) {
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
@@ -57,7 +57,7 @@ dccp_find_option(u_int8_t option,
 	if (op == NULL) {
 		/* If we don't have the whole header, drop packet. */
 		spin_unlock_bh(&dccp_buflock);
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
@@ -86,7 +86,7 @@ match_types(const struct dccp_hdr *dh, u_int16_t typemask)
 
 static inline int
 match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
-	     const struct dccp_hdr *dh, int *hotdrop)
+	     const struct dccp_hdr *dh, bool *hotdrop)
 {
 	return dccp_find_option(option, skb, protoff, dh, hotdrop);
 }
@@ -99,7 +99,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	const struct xt_dccp_info *info = matchinfo;
 	struct dccp_hdr _dh, *dh;
@@ -109,7 +109,7 @@ match(const struct sk_buff *skb,
 
 	dh = skb_header_pointer(skb, protoff, sizeof(_dh), &_dh);
 	if (dh == NULL) {
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 56b247e..c106d73 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -29,7 +29,7 @@ static int match(const struct sk_buff *skb,
 		 const void *matchinfo,
 		 int offset,
 		 unsigned int protoff,
-		 int *hotdrop)
+		 bool *hotdrop)
 {
 	const struct xt_dscp_info *info = matchinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -44,7 +44,7 @@ static int match6(const struct sk_buff *skb,
 		  const void *matchinfo,
 		  int offset,
 		  unsigned int protoff,
-		  int *hotdrop)
+		  bool *hotdrop)
 {
 	const struct xt_dscp_info *info = matchinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 7c95f14..5d3421b 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -50,7 +50,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	struct ip_esp_hdr _esp, *eh;
 	const struct xt_esp *espinfo = matchinfo;
@@ -65,7 +65,7 @@ match(const struct sk_buff *skb,
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("Dropping evil ESP tinygram.\n");
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index d3043fa..cd5cba6 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -440,7 +440,7 @@ hashlimit_match(const struct sk_buff *skb,
 		const void *matchinfo,
 		int offset,
 		unsigned int protoff,
-		int *hotdrop)
+		bool *hotdrop)
 {
 	struct xt_hashlimit_info *r =
 		((struct xt_hashlimit_info *)matchinfo)->u.master;
@@ -487,7 +487,7 @@ hashlimit_match(const struct sk_buff *skb,
 	return 0;
 
 hotdrop:
-	*hotdrop = 1;
+	*hotdrop = true;
 	return 0;
 }
 
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index c139b2f..0aa0907 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -36,7 +36,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	const struct xt_helper_info *info = matchinfo;
 	struct nf_conn *ct;
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 77288c5..621c9ee 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -28,7 +28,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	const struct xt_length_info *info = matchinfo;
 	u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
@@ -44,7 +44,7 @@ match6(const struct sk_buff *skb,
        const void *matchinfo,
        int offset,
        unsigned int protoff,
-       int *hotdrop)
+       bool *hotdrop)
 {
 	const struct xt_length_info *info = matchinfo;
 	const u_int16_t pktlen = (ntohs(ipv6_hdr(skb)->payload_len) +
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 571a72a..1133b4c 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -65,7 +65,7 @@ ipt_limit_match(const struct sk_buff *skb,
 		const void *matchinfo,
 		int offset,
 		unsigned int protoff,
-		int *hotdrop)
+		bool *hotdrop)
 {
 	struct xt_rateinfo *r = ((struct xt_rateinfo *)matchinfo)->master;
 	unsigned long now = jiffies;
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 1d3a1d9..0e6a286 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -32,7 +32,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
     const struct xt_mac_info *info = matchinfo;
 
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 39911dd..944d1ea 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -27,7 +27,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	const struct xt_mark_info *info = matchinfo;
 
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 4dce2a8..1dc53de 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -102,7 +102,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	__be16 _ports[2], *pptr;
 	const struct xt_multiport *multiinfo = matchinfo;
@@ -116,7 +116,7 @@ match(const struct sk_buff *skb,
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
@@ -133,7 +133,7 @@ match_v1(const struct sk_buff *skb,
 	 const void *matchinfo,
 	 int offset,
 	 unsigned int protoff,
-	 int *hotdrop)
+	 bool *hotdrop)
 {
 	__be16 _ports[2], *pptr;
 	const struct xt_multiport_v1 *multiinfo = matchinfo;
@@ -147,7 +147,7 @@ match_v1(const struct sk_buff *skb,
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 35a0fe2..a6de512 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -31,7 +31,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	int i;
 	static const char nulldevname[IFNAMSIZ];
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index e1409fc..692581f 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -28,7 +28,7 @@ static int match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	u_int8_t type;
 	const struct xt_pkttype_info *info = matchinfo;
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 15b45a9..6878482 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -115,7 +115,7 @@ static int match(const struct sk_buff *skb,
 		 const void *matchinfo,
 		 int offset,
 		 unsigned int protoff,
-		 int *hotdrop)
+		 bool *hotdrop)
 {
 	const struct xt_policy_info *info = matchinfo;
 	int ret;
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index bfdde06..53c71ac 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -20,7 +20,7 @@ static int
 match(const struct sk_buff *skb,
       const struct net_device *in, const struct net_device *out,
       const struct xt_match *match, const void *matchinfo,
-      int offset, unsigned int protoff, int *hotdrop)
+      int offset, unsigned int protoff, bool *hotdrop)
 {
 	struct xt_quota_info *q = ((struct xt_quota_info *)matchinfo)->master;
 	int ret = q->flags & XT_QUOTA_INVERT ? 1 : 0;
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index c2017f8..41451f5 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -29,7 +29,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	const struct xt_realm_info *info = matchinfo;
 	struct dst_entry *dst = skb->dst;
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index f86d8d7..e581afe 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -47,7 +47,7 @@ match_packet(const struct sk_buff *skb,
 	     int chunk_match_type,
 	     const struct xt_sctp_flag_info *flag_info,
 	     const int flag_count,
-	     int *hotdrop)
+	     bool *hotdrop)
 {
 	u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
 	sctp_chunkhdr_t _sch, *sch;
@@ -64,7 +64,7 @@ match_packet(const struct sk_buff *skb,
 		sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
 		if (sch == NULL || sch->length == 0) {
 			duprintf("Dropping invalid SCTP packet.\n");
-			*hotdrop = 1;
+			*hotdrop = true;
 			return 0;
 		}
 
@@ -127,7 +127,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	const struct xt_sctp_info *info = matchinfo;
 	sctp_sctphdr_t _sh, *sh;
@@ -140,7 +140,7 @@ match(const struct sk_buff *skb,
 	sh = skb_header_pointer(skb, protoff, sizeof(_sh), &_sh);
 	if (sh == NULL) {
 		duprintf("Dropping evil TCP offset=0 tinygram.\n");
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 	duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 149294f..74fe069 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -28,7 +28,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	const struct xt_state_info *sinfo = matchinfo;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 091a9f8..4e5ed81 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -28,7 +28,7 @@ static int
 match(const struct sk_buff *skb,
       const struct net_device *in, const struct net_device *out,
       const struct xt_match *match, const void *matchinfo,
-      int offset, unsigned int protoff, int *hotdrop)
+      int offset, unsigned int protoff, bool *hotdrop)
 {
 	struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo;
 	int ret = info->flags & XT_STATISTIC_INVERT ? 1 : 0;
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 999a005..7552d89 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -28,7 +28,7 @@ static int match(const struct sk_buff *skb,
 		 const void *matchinfo,
 		 int offset,
 		 unsigned int protoff,
-		 int *hotdrop)
+		 bool *hotdrop)
 {
 	const struct xt_string_info *conf = matchinfo;
 	struct ts_state state;
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 80571d0..0db4f53 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -31,7 +31,7 @@ match(const struct sk_buff *skb,
       const void *matchinfo,
       int offset,
       unsigned int protoff,
-      int *hotdrop)
+      bool *hotdrop)
 {
 	const struct xt_tcpmss_match_info *info = matchinfo;
 	struct tcphdr _tcph, *th;
@@ -77,7 +77,7 @@ out:
 	return info->invert;
 
 dropit:
-	*hotdrop = 1;
+	*hotdrop = true;
 	return 0;
 }
 
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 46414b5..ca9ccdd 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -42,7 +42,7 @@ tcp_find_option(u_int8_t option,
 		unsigned int protoff,
 		unsigned int optlen,
 		int invert,
-		int *hotdrop)
+		bool *hotdrop)
 {
 	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
 	u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
@@ -57,7 +57,7 @@ tcp_find_option(u_int8_t option,
 	op = skb_header_pointer(skb, protoff + sizeof(struct tcphdr),
 				optlen, _opt);
 	if (op == NULL) {
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
@@ -78,7 +78,7 @@ tcp_match(const struct sk_buff *skb,
 	  const void *matchinfo,
 	  int offset,
 	  unsigned int protoff,
-	  int *hotdrop)
+	  bool *hotdrop)
 {
 	struct tcphdr _tcph, *th;
 	const struct xt_tcp *tcpinfo = matchinfo;
@@ -92,7 +92,7 @@ tcp_match(const struct sk_buff *skb,
 		*/
 		if (offset == 1) {
 			duprintf("Dropping evil TCP offset=1 frag.\n");
-			*hotdrop = 1;
+			*hotdrop = true;
 		}
 		/* Must not be a fragment. */
 		return 0;
@@ -105,7 +105,7 @@ tcp_match(const struct sk_buff *skb,
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		duprintf("Dropping evil TCP offset=0 tinygram.\n");
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
@@ -123,7 +123,7 @@ tcp_match(const struct sk_buff *skb,
 		return 0;
 	if (tcpinfo->option) {
 		if (th->doff * 4 < sizeof(_tcph)) {
-			*hotdrop = 1;
+			*hotdrop = true;
 			return 0;
 		}
 		if (!tcp_find_option(tcpinfo->option, skb, protoff,
@@ -157,7 +157,7 @@ udp_match(const struct sk_buff *skb,
 	  const void *matchinfo,
 	  int offset,
 	  unsigned int protoff,
-	  int *hotdrop)
+	  bool *hotdrop)
 {
 	struct udphdr _udph, *uh;
 	const struct xt_udp *udpinfo = matchinfo;
@@ -171,7 +171,7 @@ udp_match(const struct sk_buff *skb,
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		duprintf("Dropping evil UDP tinygram.\n");
-		*hotdrop = 1;
+		*hotdrop = true;
 		return 0;
 	}
 
-- 
cgit v0.10.2


From 1d93a9cbad608f6398ba6c5b588c504ccd35a2ca Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Sat, 7 Jul 2007 22:15:35 -0700
Subject: [NETFILTER]: x_tables: switch xt_match->match to bool

Switch the return type of match functions to boolean

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index b8577d1..304fce3 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -141,14 +141,14 @@ struct xt_match
 	/* Arguments changed since 2.6.9, as this must now handle
 	   non-linear skb, using skb_header_pointer and
 	   skb_ip_make_writable. */
-	int (*match)(const struct sk_buff *skb,
-		     const struct net_device *in,
-		     const struct net_device *out,
-		     const struct xt_match *match,
-		     const void *matchinfo,
-		     int offset,
-		     unsigned int protoff,
-		     bool *hotdrop);
+	bool (*match)(const struct sk_buff *skb,
+		      const struct net_device *in,
+		      const struct net_device *out,
+		      const struct xt_match *match,
+		      const void *matchinfo,
+		      int offset,
+		      unsigned int protoff,
+		      bool *hotdrop);
 
 	/* Called when user tries to insert an entry of this type. */
 	/* Should return true or false. */
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e2a8938..b9c792d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -183,19 +183,19 @@ ipt_error(struct sk_buff **pskb,
 }
 
 static inline
-int do_match(struct ipt_entry_match *m,
-	     const struct sk_buff *skb,
-	     const struct net_device *in,
-	     const struct net_device *out,
-	     int offset,
-	     bool *hotdrop)
+bool do_match(struct ipt_entry_match *m,
+	      const struct sk_buff *skb,
+	      const struct net_device *in,
+	      const struct net_device *out,
+	      int offset,
+	      bool *hotdrop)
 {
 	/* Stop iteration if it doesn't match */
 	if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
 				      offset, ip_hdrlen(skb), hotdrop))
-		return 1;
+		return true;
 	else
-		return 0;
+		return false;
 }
 
 static inline struct ipt_entry *
@@ -2105,16 +2105,16 @@ void ipt_unregister_table(struct xt_table *table)
 }
 
 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
-static inline int
+static inline bool
 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 		     u_int8_t type, u_int8_t code,
-		     int invert)
+		     bool invert)
 {
 	return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
 		^ invert;
 }
 
-static int
+static bool
 icmp_match(const struct sk_buff *skb,
 	   const struct net_device *in,
 	   const struct net_device *out,
@@ -2129,7 +2129,7 @@ icmp_match(const struct sk_buff *skb,
 
 	/* Must not be a fragment. */
 	if (offset)
-		return 0;
+		return false;
 
 	ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
 	if (ic == NULL) {
@@ -2138,7 +2138,7 @@ icmp_match(const struct sk_buff *skb,
 		 */
 		duprintf("Dropping evil ICMP tinygram.\n");
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	return icmp_type_code_match(icmpinfo->type,
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index a9a9b75..abea446 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -22,19 +22,19 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_DESCRIPTION("iptables addrtype match");
 
-static inline int match_type(__be32 addr, u_int16_t mask)
+static inline bool match_type(__be32 addr, u_int16_t mask)
 {
 	return !!(mask & (1 << inet_addr_type(addr)));
 }
 
-static int match(const struct sk_buff *skb,
-		 const struct net_device *in, const struct net_device *out,
-		 const struct xt_match *match, const void *matchinfo,
-		 int offset, unsigned int protoff, bool *hotdrop)
+static bool match(const struct sk_buff *skb,
+		  const struct net_device *in, const struct net_device *out,
+		  const struct xt_match *match, const void *matchinfo,
+		  int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ipt_addrtype_info *info = matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
-	int ret = 1;
+	bool ret = true;
 
 	if (info->source)
 		ret &= match_type(iph->saddr, info->source)^info->invert_source;
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 9a244e4..3da39ee 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -25,10 +25,10 @@ MODULE_DESCRIPTION("iptables AH SPI match module");
 #endif
 
 /* Returns 1 if the spi is matched by the range, 0 otherwise */
-static inline int
-spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
+static inline bool
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 {
-	int r=0;
+	bool r;
 	duprintf("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
 		min,spi,max);
 	r=(spi >= min && spi <= max) ^ invert;
@@ -36,7 +36,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
 	return r;
 }
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -51,7 +51,7 @@ match(const struct sk_buff *skb,
 
 	/* Must not be a fragment. */
 	if (offset)
-		return 0;
+		return false;
 
 	ah = skb_header_pointer(skb, protoff,
 				sizeof(_ahdr), &_ahdr);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index a47f374..ba3a17e 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -22,15 +22,15 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("iptables ECN matching module");
 MODULE_LICENSE("GPL");
 
-static inline int match_ip(const struct sk_buff *skb,
-			   const struct ipt_ecn_info *einfo)
+static inline bool match_ip(const struct sk_buff *skb,
+			    const struct ipt_ecn_info *einfo)
 {
 	return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect;
 }
 
-static inline int match_tcp(const struct sk_buff *skb,
-			    const struct ipt_ecn_info *einfo,
-			    bool *hotdrop)
+static inline bool match_tcp(const struct sk_buff *skb,
+			     const struct ipt_ecn_info *einfo,
+			     bool *hotdrop)
 {
 	struct tcphdr _tcph, *th;
 
@@ -40,51 +40,51 @@ static inline int match_tcp(const struct sk_buff *skb,
 	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
 	if (th == NULL) {
 		*hotdrop = false;
-		return 0;
+		return false;
 	}
 
 	if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
 		if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
 			if (th->ece == 1)
-				return 0;
+				return false;
 		} else {
 			if (th->ece == 0)
-				return 0;
+				return false;
 		}
 	}
 
 	if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
 		if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
 			if (th->cwr == 1)
-				return 0;
+				return false;
 		} else {
 			if (th->cwr == 0)
-				return 0;
+				return false;
 		}
 	}
 
-	return 1;
+	return true;
 }
 
-static int match(const struct sk_buff *skb,
-		 const struct net_device *in, const struct net_device *out,
-		 const struct xt_match *match, const void *matchinfo,
-		 int offset, unsigned int protoff, bool *hotdrop)
+static bool match(const struct sk_buff *skb,
+		  const struct net_device *in, const struct net_device *out,
+		  const struct xt_match *match, const void *matchinfo,
+		  int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ipt_ecn_info *info = matchinfo;
 
 	if (info->operation & IPT_ECN_OP_MATCH_IP)
 		if (!match_ip(skb, info))
-			return 0;
+			return false;
 
 	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
 		if (ip_hdr(skb)->protocol != IPPROTO_TCP)
-			return 0;
+			return false;
 		if (!match_tcp(skb, info, hotdrop))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 static int checkentry(const char *tablename, const void *ip_void,
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index 86f225c..b266d98 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -23,7 +23,7 @@ MODULE_DESCRIPTION("iptables arbitrary IP range match module");
 #define DEBUGP(format, args...)
 #endif
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -44,7 +44,7 @@ match(const struct sk_buff *skb,
 				info->flags & IPRANGE_SRC_INV ? "(INV) " : "",
 				NIPQUAD(info->src.min_ip),
 				NIPQUAD(info->src.max_ip));
-			return 0;
+			return false;
 		}
 	}
 	if (info->flags & IPRANGE_DST) {
@@ -57,10 +57,10 @@ match(const struct sk_buff *skb,
 				info->flags & IPRANGE_DST_INV ? "(INV) " : "",
 				NIPQUAD(info->dst.min_ip),
 				NIPQUAD(info->dst.max_ip));
-			return 0;
+			return false;
 		}
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_match iprange_match = {
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 92be562..8f441ce 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -21,7 +21,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables owner match");
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -34,21 +34,21 @@ match(const struct sk_buff *skb,
 	const struct ipt_owner_info *info = matchinfo;
 
 	if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
-		return 0;
+		return false;
 
 	if(info->match & IPT_OWNER_UID) {
 		if ((skb->sk->sk_socket->file->f_uid != info->uid) ^
 		    !!(info->invert & IPT_OWNER_UID))
-			return 0;
+			return false;
 	}
 
 	if(info->match & IPT_OWNER_GID) {
 		if ((skb->sk->sk_socket->file->f_gid != info->gid) ^
 		    !!(info->invert & IPT_OWNER_GID))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 static int
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 81f1a01..2e513ed 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -169,7 +169,7 @@ static void recent_table_flush(struct recent_table *t)
 	}
 }
 
-static int
+static bool
 ipt_recent_match(const struct sk_buff *skb,
 		 const struct net_device *in, const struct net_device *out,
 		 const struct xt_match *match, const void *matchinfo,
@@ -180,7 +180,7 @@ ipt_recent_match(const struct sk_buff *skb,
 	struct recent_entry *e;
 	__be32 addr;
 	u_int8_t ttl;
-	int ret = info->invert;
+	bool ret = info->invert;
 
 	if (info->side == IPT_RECENT_DEST)
 		addr = ip_hdr(skb)->daddr;
@@ -202,15 +202,15 @@ ipt_recent_match(const struct sk_buff *skb,
 		e = recent_entry_init(t, addr, ttl);
 		if (e == NULL)
 			*hotdrop = true;
-		ret ^= 1;
+		ret = !ret;
 		goto out;
 	}
 
 	if (info->check_set & IPT_RECENT_SET)
-		ret ^= 1;
+		ret = !ret;
 	else if (info->check_set & IPT_RECENT_REMOVE) {
 		recent_entry_remove(t, e);
-		ret ^= 1;
+		ret = !ret;
 	} else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) {
 		unsigned long t = jiffies - info->seconds * HZ;
 		unsigned int i, hits = 0;
@@ -219,7 +219,7 @@ ipt_recent_match(const struct sk_buff *skb,
 			if (info->seconds && time_after(t, e->stamps[i]))
 				continue;
 			if (++hits >= info->hit_count) {
-				ret ^= 1;
+				ret = !ret;
 				break;
 			}
 		}
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 803ed4c..67699ae 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -18,7 +18,7 @@
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("iptables TOS match module");
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index e7316b2..82fe4ea 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -18,10 +18,10 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("IP tables TTL matching module");
 MODULE_LICENSE("GPL");
 
-static int match(const struct sk_buff *skb,
-		 const struct net_device *in, const struct net_device *out,
-		 const struct xt_match *match, const void *matchinfo,
-		 int offset, unsigned int protoff, bool *hotdrop)
+static bool match(const struct sk_buff *skb,
+		  const struct net_device *in, const struct net_device *out,
+		  const struct xt_match *match, const void *matchinfo,
+		  int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ipt_ttl_info *info = matchinfo;
 	const u8 ttl = ip_hdr(skb)->ttl;
@@ -42,10 +42,10 @@ static int match(const struct sk_buff *skb,
 		default:
 			printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
 				info->mode);
-			return 0;
+			return false;
 	}
 
-	return 0;
+	return false;
 }
 
 static struct xt_match ttl_match = {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 13c66a7..31f42e8 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -96,7 +96,7 @@ ip6t_ext_hdr(u8 nexthdr)
 }
 
 /* Returns whether matches rule or not. */
-static inline int
+static inline bool
 ip6_packet_match(const struct sk_buff *skb,
 		 const char *indev,
 		 const char *outdev,
@@ -122,7 +122,7 @@ ip6_packet_match(const struct sk_buff *skb,
 		dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
 			ipinfo->dmsk.s_addr, ipinfo->dst.s_addr,
 			ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/
-		return 0;
+		return false;
 	}
 
 	/* Look for ifname matches; this should unroll nicely. */
@@ -136,7 +136,7 @@ ip6_packet_match(const struct sk_buff *skb,
 		dprintf("VIA in mismatch (%s vs %s).%s\n",
 			indev, ip6info->iniface,
 			ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":"");
-		return 0;
+		return false;
 	}
 
 	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
@@ -149,7 +149,7 @@ ip6_packet_match(const struct sk_buff *skb,
 		dprintf("VIA out mismatch (%s vs %s).%s\n",
 			outdev, ip6info->outiface,
 			ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":"");
-		return 0;
+		return false;
 	}
 
 /* ... might want to do something with class and flowlabel here ... */
@@ -163,7 +163,7 @@ ip6_packet_match(const struct sk_buff *skb,
 		if (protohdr < 0) {
 			if (_frag_off == 0)
 				*hotdrop = true;
-			return 0;
+			return false;
 		}
 		*fragoff = _frag_off;
 
@@ -174,17 +174,17 @@ ip6_packet_match(const struct sk_buff *skb,
 
 		if (ip6info->proto == protohdr) {
 			if(ip6info->invflags & IP6T_INV_PROTO) {
-				return 0;
+				return false;
 			}
-			return 1;
+			return true;
 		}
 
 		/* We need match for the '-p all', too! */
 		if ((ip6info->proto != 0) &&
 			!(ip6info->invflags & IP6T_INV_PROTO))
-			return 0;
+			return false;
 	}
-	return 1;
+	return true;
 }
 
 /* should be ip6 safe */
@@ -219,20 +219,20 @@ ip6t_error(struct sk_buff **pskb,
 }
 
 static inline
-int do_match(struct ip6t_entry_match *m,
-	     const struct sk_buff *skb,
-	     const struct net_device *in,
-	     const struct net_device *out,
-	     int offset,
-	     unsigned int protoff,
-	     bool *hotdrop)
+bool do_match(struct ip6t_entry_match *m,
+	      const struct sk_buff *skb,
+	      const struct net_device *in,
+	      const struct net_device *out,
+	      int offset,
+	      unsigned int protoff,
+	      bool *hotdrop)
 {
 	/* Stop iteration if it doesn't match */
 	if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
 				      offset, protoff, hotdrop))
-		return 1;
+		return true;
 	else
-		return 0;
+		return false;
 }
 
 static inline struct ip6t_entry *
@@ -1291,7 +1291,7 @@ icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 		^ invert;
 }
 
-static int
+static bool
 icmp6_match(const struct sk_buff *skb,
 	   const struct net_device *in,
 	   const struct net_device *out,
@@ -1306,7 +1306,7 @@ icmp6_match(const struct sk_buff *skb,
 
 	/* Must not be a fragment. */
 	if (offset)
-		return 0;
+		return false;
 
 	ic = skb_header_pointer(skb, protoff, sizeof(_icmp), &_icmp);
 	if (ic == NULL) {
@@ -1314,7 +1314,7 @@ icmp6_match(const struct sk_buff *skb,
 		   can't.  Hence, no choice but to drop. */
 		duprintf("Dropping evil ICMP tinygram.\n");
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	return icmp6_type_code_match(icmpinfo->type,
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 27b7bd2..607c2eb 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -30,10 +30,10 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 #endif
 
 /* Returns 1 if the spi is matched by the range, 0 otherwise */
-static inline int
-spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
+static inline bool
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 {
-	int r=0;
+	bool r;
 	DEBUGP("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
 	       min,spi,max);
 	r = (spi >= min && spi <= max) ^ invert;
@@ -41,7 +41,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
 	return r;
 }
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -61,13 +61,13 @@ match(const struct sk_buff *skb,
 	if (err < 0) {
 		if (err != -ENOENT)
 			*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
 	if (ah == NULL) {
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	hdrlen = (ah->hdrlen + 2) << 2;
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 69e79e1..bebb12a 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -19,7 +19,7 @@ MODULE_DESCRIPTION("IPv6 EUI64 address checking match");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -36,7 +36,7 @@ match(const struct sk_buff *skb,
 	      (skb_mac_header(skb) + ETH_HLEN) <= skb->data) &&
 	    offset != 0) {
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	memset(eui64, 0, sizeof(eui64));
@@ -55,11 +55,11 @@ match(const struct sk_buff *skb,
 				i++;
 
 			if (i == 8)
-				return 1;
+				return true;
 		}
 	}
 
-	return 0;
+	return false;
 }
 
 static struct xt_match eui64_match = {
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 740fdca..0ed5fbc 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -29,10 +29,10 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 #endif
 
 /* Returns 1 if the id is matched by the range, 0 otherwise */
-static inline int
-id_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert)
+static inline bool
+id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 {
-	int r = 0;
+	bool r;
 	DEBUGP("frag id_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ',
 	       min, id, max);
 	r = (id >= min && id <= max) ^ invert;
@@ -40,7 +40,7 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert)
 	return r;
 }
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -59,13 +59,13 @@ match(const struct sk_buff *skb,
 	if (err < 0) {
 		if (err != -ENOENT)
 			*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
 	if (fh == NULL) {
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	DEBUGP("INFO %04X ", fh->frag_off);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 5633de1..4b05393 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -47,7 +47,7 @@ MODULE_ALIAS("ip6t_dst");
  *	5	-> RTALERT 2 x x
  */
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -62,7 +62,7 @@ match(const struct sk_buff *skb,
 	unsigned int temp;
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
-	unsigned int ret = 0;
+	bool ret = false;
 	u8 _opttype, *tp = NULL;
 	u8 _optlen, *lp = NULL;
 	unsigned int optlen;
@@ -72,19 +72,19 @@ match(const struct sk_buff *skb,
 	if (err < 0) {
 		if (err != -ENOENT)
 			*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
 	if (oh == NULL) {
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	hdrlen = ipv6_optlen(oh);
 	if (skb->len - ptr < hdrlen) {
 		/* Packet smaller than it's length field */
-		return 0;
+		return false;
 	}
 
 	DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
@@ -123,7 +123,7 @@ match(const struct sk_buff *skb,
 				DEBUGP("Tbad %02X %02X\n",
 				       *tp,
 				       (optinfo->opts[temp] & 0xFF00) >> 8);
-				return 0;
+				return false;
 			} else {
 				DEBUGP("Tok ");
 			}
@@ -144,7 +144,7 @@ match(const struct sk_buff *skb,
 				if (spec_len != 0x00FF && spec_len != *lp) {
 					DEBUGP("Lbad %02X %04X\n", *lp,
 					       spec_len);
-					return 0;
+					return false;
 				}
 				DEBUGP("Lok ");
 				optlen = *lp + 2;
@@ -167,10 +167,10 @@ match(const struct sk_buff *skb,
 		if (temp == optinfo->optsnr)
 			return ret;
 		else
-			return 0;
+			return false;
 	}
 
-	return 0;
+	return false;
 }
 
 /* Called when user tries to insert an entry of this type. */
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index cbf49cf..b933e84 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -19,10 +19,10 @@ MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
 MODULE_DESCRIPTION("IP tables Hop Limit matching module");
 MODULE_LICENSE("GPL");
 
-static int match(const struct sk_buff *skb,
-		 const struct net_device *in, const struct net_device *out,
-		 const struct xt_match *match, const void *matchinfo,
-		 int offset, unsigned int protoff, bool *hotdrop)
+static bool match(const struct sk_buff *skb,
+		  const struct net_device *in, const struct net_device *out,
+		  const struct xt_match *match, const void *matchinfo,
+		  int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ip6t_hl_info *info = matchinfo;
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -43,10 +43,10 @@ static int match(const struct sk_buff *skb,
 		default:
 			printk(KERN_WARNING "ip6t_hl: unknown mode %d\n",
 				info->mode);
-			return 0;
+			return false;
 	}
 
-	return 0;
+	return false;
 }
 
 static struct xt_match hl_match = {
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 469dec2..3222e89 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -26,7 +26,7 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("IPv6 headers match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
-static int
+static bool
 ipv6header_match(const struct sk_buff *skb,
 		 const struct net_device *in,
 		 const struct net_device *out,
@@ -58,7 +58,7 @@ ipv6header_match(const struct sk_buff *skb,
 
 		/* Is there enough space for the next ext header? */
 		if (len < (int)sizeof(struct ipv6_opt_hdr))
-			return 0;
+			return false;
 		/* No more exthdr -> evaluate */
 		if (nexthdr == NEXTHDR_NONE) {
 			temp |= MASK_NONE;
@@ -99,7 +99,7 @@ ipv6header_match(const struct sk_buff *skb,
 			temp |= MASK_DSTOPTS;
 			break;
 		default:
-			return 0;
+			return false;
 			break;
 		}
 
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index c27647b..ddffe03 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -31,16 +31,13 @@ MODULE_LICENSE("GPL");
 #endif
 
 /* Returns 1 if the type is matched by the range, 0 otherwise */
-static inline int
-type_match(u_int8_t min, u_int8_t max, u_int8_t type, int invert)
+static inline bool
+type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
 {
-	int ret;
-
-	ret = (type >= min && type <= max) ^ invert;
-	return ret;
+	return (type >= min && type <= max) ^ invert;
 }
 
-static int
+static bool
 match(const struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
@@ -55,7 +52,7 @@ match(const struct sk_buff *skb,
 
 	/* Must not be a fragment. */
 	if (offset)
-		return 0;
+		return false;
 
 	mh = skb_header_pointer(skb, protoff, sizeof(_mh), &_mh);
 	if (mh == NULL) {
@@ -63,14 +60,14 @@ match(const struct sk_buff *skb,
 		   can't.  Hence, no choice but to drop. */
 		duprintf("Dropping evil MH tinygram.\n");
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	if (mh->ip6mh_proto != IPPROTO_NONE) {
 		duprintf("Dropping invalid MH Payload Proto: %u\n",
 			 mh->ip6mh_proto);
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type,
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index f90f7c3..cadd0a6 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -23,7 +23,7 @@ MODULE_DESCRIPTION("IP6 tables owner matching module");
 MODULE_LICENSE("GPL");
 
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -36,21 +36,21 @@ match(const struct sk_buff *skb,
 	const struct ip6t_owner_info *info = matchinfo;
 
 	if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
-		return 0;
+		return false;
 
 	if (info->match & IP6T_OWNER_UID) {
 		if ((skb->sk->sk_socket->file->f_uid != info->uid) ^
 		    !!(info->invert & IP6T_OWNER_UID))
-			return 0;
+			return false;
 	}
 
 	if (info->match & IP6T_OWNER_GID) {
 		if ((skb->sk->sk_socket->file->f_gid != info->gid) ^
 		    !!(info->invert & IP6T_OWNER_GID))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 static int
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 2bb8821..7966f4a 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -31,10 +31,10 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 #endif
 
 /* Returns 1 if the id is matched by the range, 0 otherwise */
-static inline int
-segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert)
+static inline bool
+segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 {
-	int r = 0;
+	bool r;
 	DEBUGP("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x",
 	       invert ? '!' : ' ', min, id, max);
 	r = (id >= min && id <= max) ^ invert;
@@ -42,7 +42,7 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert)
 	return r;
 }
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -57,7 +57,7 @@ match(const struct sk_buff *skb,
 	unsigned int temp;
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
-	unsigned int ret = 0;
+	bool ret = false;
 	struct in6_addr *ap, _addr;
 	int err;
 
@@ -65,19 +65,19 @@ match(const struct sk_buff *skb,
 	if (err < 0) {
 		if (err != -ENOENT)
 			*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
 	if (rh == NULL) {
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	hdrlen = ipv6_optlen(rh);
 	if (skb->len - ptr < hdrlen) {
 		/* Pcket smaller than its length field */
-		return 0;
+		return false;
 	}
 
 	DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
@@ -136,7 +136,7 @@ match(const struct sk_buff *skb,
 		DEBUGP("Not strict ");
 		if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
 			DEBUGP("There isn't enough space\n");
-			return 0;
+			return false;
 		} else {
 			unsigned int i = 0;
 
@@ -164,13 +164,13 @@ match(const struct sk_buff *skb,
 			if (i == rtinfo->addrnr)
 				return ret;
 			else
-				return 0;
+				return false;
 		}
 	} else {
 		DEBUGP("Strict ");
 		if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
 			DEBUGP("There isn't enough space\n");
-			return 0;
+			return false;
 		} else {
 			DEBUGP("#%d ", rtinfo->addrnr);
 			for (temp = 0; temp < rtinfo->addrnr; temp++) {
@@ -190,11 +190,11 @@ match(const struct sk_buff *skb,
 			    (temp == (unsigned int)((hdrlen - 8) / 16)))
 				return ret;
 			else
-				return 0;
+				return false;
 		}
 	}
 
-	return 0;
+	return false;
 }
 
 /* Called when user tries to insert an entry of this type. */
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 20690ea..aa9503f 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -15,7 +15,7 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_comment");
 MODULE_ALIAS("ip6t_comment");
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -26,7 +26,7 @@ match(const struct sk_buff *skb,
       bool *hotdrop)
 {
 	/* We always match */
-	return 1;
+	return true;
 }
 
 static struct xt_match xt_comment_match[] = {
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 8fe5775..aada7b7 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -15,7 +15,7 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection");
 MODULE_ALIAS("ipt_connbytes");
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -35,7 +35,7 @@ match(const struct sk_buff *skb,
 
 	ct = nf_ct_get(skb, &ctinfo);
 	if (!ct)
-		return 0;
+		return false;
 	counters = ct->counters;
 
 	switch (sinfo->what) {
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 8a6d58a..3321b80 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -30,7 +30,7 @@ MODULE_DESCRIPTION("IP tables connmark match module");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_connmark");
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -46,7 +46,7 @@ match(const struct sk_buff *skb,
 
 	ct = nf_ct_get(skb, &ctinfo);
 	if (!ct)
-		return 0;
+		return false;
 
 	return (((ct->mark) & info->mask) == info->mark) ^ info->invert;
 }
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 915c730..26901f9 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -19,7 +19,7 @@ MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables connection tracking match module");
 MODULE_ALIAS("ipt_conntrack");
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -54,53 +54,53 @@ match(const struct sk_buff *skb,
 		}
 		if (FWINV((statebit & sinfo->statemask) == 0,
 			  XT_CONNTRACK_STATE))
-			return 0;
+			return false;
 	}
 
 	if (ct == NULL) {
 		if (sinfo->flags & ~XT_CONNTRACK_STATE)
-			return 0;
-		return 1;
+			return false;
+		return true;
 	}
 
 	if (sinfo->flags & XT_CONNTRACK_PROTO &&
 	    FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum !=
 		  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
 		  XT_CONNTRACK_PROTO))
-		return 0;
+		return false;
 
 	if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
 	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip &
 		   sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
 		  sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
 		  XT_CONNTRACK_ORIGSRC))
-		return 0;
+		return false;
 
 	if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
 	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip &
 		   sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
 		  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
 		  XT_CONNTRACK_ORIGDST))
-		return 0;
+		return false;
 
 	if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
 	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip &
 		   sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
 		  sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
 		  XT_CONNTRACK_REPLSRC))
-		return 0;
+		return false;
 
 	if (sinfo->flags & XT_CONNTRACK_REPLDST &&
 	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip &
 		   sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
 		  sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
 		  XT_CONNTRACK_REPLDST))
-		return 0;
+		return false;
 
 	if (sinfo->flags & XT_CONNTRACK_STATUS &&
 	    FWINV((ct->status & sinfo->statusmask) == 0,
 		  XT_CONNTRACK_STATUS))
-		return 0;
+		return false;
 
 	if(sinfo->flags & XT_CONNTRACK_EXPIRES) {
 		unsigned long expires = timer_pending(&ct->timeout) ?
@@ -109,9 +109,9 @@ match(const struct sk_buff *skb,
 		if (FWINV(!(expires >= sinfo->expires_min &&
 			    expires <= sinfo->expires_max),
 			  XT_CONNTRACK_EXPIRES))
-			return 0;
+			return false;
 	}
-	return 1;
+	return true;
 }
 
 static int
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 3172e73..b0eba4e 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -31,7 +31,7 @@ MODULE_ALIAS("ipt_dccp");
 static unsigned char *dccp_optbuf;
 static DEFINE_SPINLOCK(dccp_buflock);
 
-static inline int
+static inline bool
 dccp_find_option(u_int8_t option,
 		 const struct sk_buff *skb,
 		 unsigned int protoff,
@@ -46,11 +46,11 @@ dccp_find_option(u_int8_t option,
 
 	if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) {
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	if (!optlen)
-		return 0;
+		return false;
 
 	spin_lock_bh(&dccp_buflock);
 	op = skb_header_pointer(skb, protoff + optoff, optlen, dccp_optbuf);
@@ -58,13 +58,13 @@ dccp_find_option(u_int8_t option,
 		/* If we don't have the whole header, drop packet. */
 		spin_unlock_bh(&dccp_buflock);
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	for (i = 0; i < optlen; ) {
 		if (op[i] == option) {
 			spin_unlock_bh(&dccp_buflock);
-			return 1;
+			return true;
 		}
 
 		if (op[i] < 2)
@@ -74,24 +74,24 @@ dccp_find_option(u_int8_t option,
 	}
 
 	spin_unlock_bh(&dccp_buflock);
-	return 0;
+	return false;
 }
 
 
-static inline int
+static inline bool
 match_types(const struct dccp_hdr *dh, u_int16_t typemask)
 {
 	return (typemask & (1 << dh->dccph_type));
 }
 
-static inline int
+static inline bool
 match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
 	     const struct dccp_hdr *dh, bool *hotdrop)
 {
 	return dccp_find_option(option, skb, protoff, dh, hotdrop);
 }
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -105,12 +105,12 @@ match(const struct sk_buff *skb,
 	struct dccp_hdr _dh, *dh;
 
 	if (offset)
-		return 0;
+		return false;
 
 	dh = skb_header_pointer(skb, protoff, sizeof(_dh), &_dh);
 	if (dh == NULL) {
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	return  DCCHECK(((ntohs(dh->dccph_sport) >= info->spts[0])
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index c106d73..c9c6518 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -22,22 +22,7 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_dscp");
 MODULE_ALIAS("ip6t_dscp");
 
-static int match(const struct sk_buff *skb,
-		 const struct net_device *in,
-		 const struct net_device *out,
-		 const struct xt_match *match,
-		 const void *matchinfo,
-		 int offset,
-		 unsigned int protoff,
-		 bool *hotdrop)
-{
-	const struct xt_dscp_info *info = matchinfo;
-	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
-
-	return (dscp == info->dscp) ^ !!info->invert;
-}
-
-static int match6(const struct sk_buff *skb,
+static bool match(const struct sk_buff *skb,
 		  const struct net_device *in,
 		  const struct net_device *out,
 		  const struct xt_match *match,
@@ -47,6 +32,21 @@ static int match6(const struct sk_buff *skb,
 		  bool *hotdrop)
 {
 	const struct xt_dscp_info *info = matchinfo;
+	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
+
+	return (dscp == info->dscp) ^ !!info->invert;
+}
+
+static bool match6(const struct sk_buff *skb,
+		   const struct net_device *in,
+		   const struct net_device *out,
+		   const struct xt_match *match,
+		   const void *matchinfo,
+		   int offset,
+		   unsigned int protoff,
+		   bool *hotdrop)
+{
+	const struct xt_dscp_info *info = matchinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	return (dscp == info->dscp) ^ !!info->invert;
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 5d3421b..1a945cb 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -31,10 +31,10 @@ MODULE_ALIAS("ip6t_esp");
 #endif
 
 /* Returns 1 if the spi is matched by the range, 0 otherwise */
-static inline int
-spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
+static inline bool
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 {
-	int r = 0;
+	bool r;
 	duprintf("esp spi_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ',
 		 min, spi, max);
 	r = (spi >= min && spi <= max) ^ invert;
@@ -42,7 +42,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
 	return r;
 }
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -57,7 +57,7 @@ match(const struct sk_buff *skb,
 
 	/* Must not be a fragment. */
 	if (offset)
-		return 0;
+		return false;
 
 	eh = skb_header_pointer(skb, protoff, sizeof(_esp), &_esp);
 	if (eh == NULL) {
@@ -66,7 +66,7 @@ match(const struct sk_buff *skb,
 		 */
 		duprintf("Dropping evil ESP tinygram.\n");
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	return spi_match(espinfo->spis[0], espinfo->spis[1], ntohl(eh->spi),
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index cd5cba6..21597b7 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -94,7 +94,8 @@ static DEFINE_MUTEX(hlimit_mutex);	/* additional checkentry protection */
 static HLIST_HEAD(hashlimit_htables);
 static struct kmem_cache *hashlimit_cachep __read_mostly;
 
-static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b)
+static inline bool dst_cmp(const struct dsthash_ent *ent,
+			   struct dsthash_dst *b)
 {
 	return !memcmp(&ent->dst, b, sizeof(ent->dst));
 }
@@ -227,18 +228,18 @@ static int htable_create(struct xt_hashlimit_info *minfo, int family)
 	return 0;
 }
 
-static int select_all(struct xt_hashlimit_htable *ht, struct dsthash_ent *he)
+static bool select_all(struct xt_hashlimit_htable *ht, struct dsthash_ent *he)
 {
 	return 1;
 }
 
-static int select_gc(struct xt_hashlimit_htable *ht, struct dsthash_ent *he)
+static bool select_gc(struct xt_hashlimit_htable *ht, struct dsthash_ent *he)
 {
 	return (jiffies >= he->expires);
 }
 
 static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
-				int (*select)(struct xt_hashlimit_htable *ht,
+				bool (*select)(struct xt_hashlimit_htable *ht,
 					      struct dsthash_ent *he))
 {
 	unsigned int i;
@@ -432,7 +433,7 @@ hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst,
 	return 0;
 }
 
-static int
+static bool
 hashlimit_match(const struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
@@ -478,17 +479,17 @@ hashlimit_match(const struct sk_buff *skb,
 		/* We're underlimit. */
 		dh->rateinfo.credit -= dh->rateinfo.cost;
 		spin_unlock_bh(&hinfo->lock);
-		return 1;
+		return true;
 	}
 
 	spin_unlock_bh(&hinfo->lock);
 
 	/* default case: we're overlimit, thus don't match */
-	return 0;
+	return false;
 
 hotdrop:
 	*hotdrop = true;
-	return 0;
+	return false;
 }
 
 static int
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 0aa0907..10c629b 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -28,7 +28,7 @@ MODULE_ALIAS("ip6t_helper");
 #define DEBUGP(format, args...)
 #endif
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -42,7 +42,7 @@ match(const struct sk_buff *skb,
 	struct nf_conn *ct;
 	struct nf_conn_help *master_help;
 	enum ip_conntrack_info ctinfo;
-	int ret = info->invert;
+	bool ret = info->invert;
 
 	ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
 	if (!ct) {
@@ -67,7 +67,7 @@ match(const struct sk_buff *skb,
 		ct->master->helper->name, info->name);
 
 	if (info->name[0] == '\0')
-		ret ^= 1;
+		ret = !ret;
 	else
 		ret ^= !strncmp(master_help->helper->name, info->name,
 				strlen(master_help->helper->name));
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 621c9ee..57bcfac 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -20,7 +20,7 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_length");
 MODULE_ALIAS("ip6t_length");
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -36,7 +36,7 @@ match(const struct sk_buff *skb,
 	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
 }
 
-static int
+static bool
 match6(const struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 1133b4c..0cfe241 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -57,7 +57,7 @@ static DEFINE_SPINLOCK(limit_lock);
 
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
-static int
+static bool
 ipt_limit_match(const struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
@@ -79,11 +79,11 @@ ipt_limit_match(const struct sk_buff *skb,
 		/* We're not limited. */
 		r->credit -= r->cost;
 		spin_unlock_bh(&limit_lock);
-		return 1;
+		return true;
 	}
 
 	spin_unlock_bh(&limit_lock);
-	return 0;
+	return false;
 }
 
 /* Precision saver. */
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 0e6a286..8602202 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -24,7 +24,7 @@ MODULE_DESCRIPTION("iptables mac matching module");
 MODULE_ALIAS("ipt_mac");
 MODULE_ALIAS("ip6t_mac");
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 944d1ea..10c6799 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -19,7 +19,7 @@ MODULE_DESCRIPTION("iptables mark matching module");
 MODULE_ALIAS("ipt_mark");
 MODULE_ALIAS("ip6t_mark");
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 1dc53de..55feb3d 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -33,24 +33,24 @@ MODULE_ALIAS("ip6t_multiport");
 #endif
 
 /* Returns 1 if the port is matched by the test, 0 otherwise. */
-static inline int
+static inline bool
 ports_match(const u_int16_t *portlist, enum xt_multiport_flags flags,
 	    u_int8_t count, u_int16_t src, u_int16_t dst)
 {
 	unsigned int i;
 	for (i = 0; i < count; i++) {
 		if (flags != XT_MULTIPORT_DESTINATION && portlist[i] == src)
-			return 1;
+			return true;
 
 		if (flags != XT_MULTIPORT_SOURCE && portlist[i] == dst)
-			return 1;
+			return true;
 	}
 
-	return 0;
+	return false;
 }
 
 /* Returns 1 if the port is matched by the test, 0 otherwise. */
-static inline int
+static inline bool
 ports_match_v1(const struct xt_multiport_v1 *minfo,
 	       u_int16_t src, u_int16_t dst)
 {
@@ -67,34 +67,34 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 
 			if (minfo->flags == XT_MULTIPORT_SOURCE
 			    && src >= s && src <= e)
-				return 1 ^ minfo->invert;
+				return true ^ minfo->invert;
 			if (minfo->flags == XT_MULTIPORT_DESTINATION
 			    && dst >= s && dst <= e)
-				return 1 ^ minfo->invert;
+				return true ^ minfo->invert;
 			if (minfo->flags == XT_MULTIPORT_EITHER
 			    && ((dst >= s && dst <= e)
 				|| (src >= s && src <= e)))
-				return 1 ^ minfo->invert;
+				return true ^ minfo->invert;
 		} else {
 			/* exact port matching */
 			duprintf("src or dst matches with %d?\n", s);
 
 			if (minfo->flags == XT_MULTIPORT_SOURCE
 			    && src == s)
-				return 1 ^ minfo->invert;
+				return true ^ minfo->invert;
 			if (minfo->flags == XT_MULTIPORT_DESTINATION
 			    && dst == s)
-				return 1 ^ minfo->invert;
+				return true ^ minfo->invert;
 			if (minfo->flags == XT_MULTIPORT_EITHER
 			    && (src == s || dst == s))
-				return 1 ^ minfo->invert;
+				return true ^ minfo->invert;
 		}
 	}
 
 	return minfo->invert;
 }
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -108,7 +108,7 @@ match(const struct sk_buff *skb,
 	const struct xt_multiport *multiinfo = matchinfo;
 
 	if (offset)
-		return 0;
+		return false;
 
 	pptr = skb_header_pointer(skb, protoff, sizeof(_ports), _ports);
 	if (pptr == NULL) {
@@ -117,7 +117,7 @@ match(const struct sk_buff *skb,
 		 */
 		duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	return ports_match(multiinfo->ports,
@@ -125,7 +125,7 @@ match(const struct sk_buff *skb,
 			   ntohs(pptr[0]), ntohs(pptr[1]));
 }
 
-static int
+static bool
 match_v1(const struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
@@ -139,7 +139,7 @@ match_v1(const struct sk_buff *skb,
 	const struct xt_multiport_v1 *multiinfo = matchinfo;
 
 	if (offset)
-		return 0;
+		return false;
 
 	pptr = skb_header_pointer(skb, protoff, sizeof(_ports), _ports);
 	if (pptr == NULL) {
@@ -148,7 +148,7 @@ match_v1(const struct sk_buff *skb,
 		 */
 		duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	return ports_match_v1(multiinfo, ntohs(pptr[0]), ntohs(pptr[1]));
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index a6de512..70de670 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -14,8 +14,6 @@
 #include <linux/netfilter/xt_physdev.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge.h>
-#define MATCH   1
-#define NOMATCH 0
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
@@ -23,7 +21,7 @@ MODULE_DESCRIPTION("iptables bridge physical device match module");
 MODULE_ALIAS("ipt_physdev");
 MODULE_ALIAS("ip6t_physdev");
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -36,7 +34,7 @@ match(const struct sk_buff *skb,
 	int i;
 	static const char nulldevname[IFNAMSIZ];
 	const struct xt_physdev_info *info = matchinfo;
-	unsigned int ret;
+	bool ret;
 	const char *indev, *outdev;
 	struct nf_bridge_info *nf_bridge;
 
@@ -47,58 +45,58 @@ match(const struct sk_buff *skb,
 		/* Return MATCH if the invert flags of the used options are on */
 		if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
 		    !(info->invert & XT_PHYSDEV_OP_BRIDGED))
-			return NOMATCH;
+			return false;
 		if ((info->bitmask & XT_PHYSDEV_OP_ISIN) &&
 		    !(info->invert & XT_PHYSDEV_OP_ISIN))
-			return NOMATCH;
+			return false;
 		if ((info->bitmask & XT_PHYSDEV_OP_ISOUT) &&
 		    !(info->invert & XT_PHYSDEV_OP_ISOUT))
-			return NOMATCH;
+			return false;
 		if ((info->bitmask & XT_PHYSDEV_OP_IN) &&
 		    !(info->invert & XT_PHYSDEV_OP_IN))
-			return NOMATCH;
+			return false;
 		if ((info->bitmask & XT_PHYSDEV_OP_OUT) &&
 		    !(info->invert & XT_PHYSDEV_OP_OUT))
-			return NOMATCH;
-		return MATCH;
+			return false;
+		return true;
 	}
 
 	/* This only makes sense in the FORWARD and POSTROUTING chains */
 	if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
 	    (!!(nf_bridge->mask & BRNF_BRIDGED) ^
 	    !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
-		return NOMATCH;
+		return false;
 
 	if ((info->bitmask & XT_PHYSDEV_OP_ISIN &&
 	    (!nf_bridge->physindev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) ||
 	    (info->bitmask & XT_PHYSDEV_OP_ISOUT &&
 	    (!nf_bridge->physoutdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT))))
-		return NOMATCH;
+		return false;
 
 	if (!(info->bitmask & XT_PHYSDEV_OP_IN))
 		goto match_outdev;
 	indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
-	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
+	for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) {
 		ret |= (((const unsigned int *)indev)[i]
 			^ ((const unsigned int *)info->physindev)[i])
 			& ((const unsigned int *)info->in_mask)[i];
 	}
 
-	if ((ret == 0) ^ !(info->invert & XT_PHYSDEV_OP_IN))
-		return NOMATCH;
+	if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
+		return false;
 
 match_outdev:
 	if (!(info->bitmask & XT_PHYSDEV_OP_OUT))
-		return MATCH;
+		return true;
 	outdev = nf_bridge->physoutdev ?
 		 nf_bridge->physoutdev->name : nulldevname;
-	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
+	for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) {
 		ret |= (((const unsigned int *)outdev)[i]
 			^ ((const unsigned int *)info->physoutdev)[i])
 			& ((const unsigned int *)info->out_mask)[i];
 	}
 
-	return (ret != 0) ^ !(info->invert & XT_PHYSDEV_OP_OUT);
+	return ret ^ !(info->invert & XT_PHYSDEV_OP_OUT);
 }
 
 static int
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 692581f..6323972 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -21,7 +21,7 @@ MODULE_DESCRIPTION("IP tables match to match on linklayer packet type");
 MODULE_ALIAS("ipt_pkttype");
 MODULE_ALIAS("ip6t_pkttype");
 
-static int match(const struct sk_buff *skb,
+static bool match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
       const struct xt_match *match,
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 6878482..0aa487b 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -20,7 +20,7 @@ MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_DESCRIPTION("Xtables IPsec policy matching module");
 MODULE_LICENSE("GPL");
 
-static inline int
+static inline bool
 xt_addr_cmp(const union xt_policy_addr *a1, const union xt_policy_addr *m,
 	    const union xt_policy_addr *a2, unsigned short family)
 {
@@ -30,10 +30,10 @@ xt_addr_cmp(const union xt_policy_addr *a1, const union xt_policy_addr *m,
 	case AF_INET6:
 		return !ipv6_masked_addr_cmp(&a1->a6, &m->a6, &a2->a6);
 	}
-	return 0;
+	return false;
 }
 
-static inline int
+static inline bool
 match_xfrm_state(struct xfrm_state *x, const struct xt_policy_elem *e,
 		 unsigned short family)
 {
@@ -108,14 +108,14 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
 	return strict ? i == info->len : 0;
 }
 
-static int match(const struct sk_buff *skb,
-		 const struct net_device *in,
-		 const struct net_device *out,
-		 const struct xt_match *match,
-		 const void *matchinfo,
-		 int offset,
-		 unsigned int protoff,
-		 bool *hotdrop)
+static bool match(const struct sk_buff *skb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  const struct xt_match *match,
+		  const void *matchinfo,
+		  int offset,
+		  unsigned int protoff,
+		  bool *hotdrop)
 {
 	const struct xt_policy_info *info = matchinfo;
 	int ret;
@@ -126,9 +126,9 @@ static int match(const struct sk_buff *skb,
 		ret = match_policy_out(skb, info, match->family);
 
 	if (ret < 0)
-		ret = info->flags & XT_POLICY_MATCH_NONE ? 1 : 0;
+		ret = info->flags & XT_POLICY_MATCH_NONE ? true : false;
 	else if (info->flags & XT_POLICY_MATCH_NONE)
-		ret = 0;
+		ret = false;
 
 	return ret;
 }
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 53c71ac..6091347 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -16,19 +16,19 @@ MODULE_ALIAS("ip6t_quota");
 
 static DEFINE_SPINLOCK(quota_lock);
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in, const struct net_device *out,
       const struct xt_match *match, const void *matchinfo,
       int offset, unsigned int protoff, bool *hotdrop)
 {
 	struct xt_quota_info *q = ((struct xt_quota_info *)matchinfo)->master;
-	int ret = q->flags & XT_QUOTA_INVERT ? 1 : 0;
+	bool ret = q->flags & XT_QUOTA_INVERT;
 
 	spin_lock_bh(&quota_lock);
 	if (q->quota >= skb->len) {
 		q->quota -= skb->len;
-		ret ^= 1;
+		ret = !ret;
 	} else {
 		/* we do not allow even small packets from now on */
 		q->quota = 0;
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 41451f5..ad82c13 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -21,7 +21,7 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("X_tables realm match");
 MODULE_ALIAS("ipt_realm");
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index e581afe..a118a4c 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_sctp");
 #define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
 					      || (!!((invflag) & (option)) ^ (cond)))
 
-static int
+static bool
 match_flags(const struct xt_sctp_flag_info *flag_info,
 	    const int flag_count,
 	    u_int8_t chunktype,
@@ -37,10 +37,10 @@ match_flags(const struct xt_sctp_flag_info *flag_info,
 		}
 	}
 
-	return 1;
+	return true;
 }
 
-static inline int
+static inline bool
 match_packet(const struct sk_buff *skb,
 	     unsigned int offset,
 	     const u_int32_t *chunkmap,
@@ -65,7 +65,7 @@ match_packet(const struct sk_buff *skb,
 		if (sch == NULL || sch->length == 0) {
 			duprintf("Dropping invalid SCTP packet.\n");
 			*hotdrop = true;
-			return 0;
+			return false;
 		}
 
 		duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n",
@@ -80,7 +80,7 @@ match_packet(const struct sk_buff *skb,
 			case SCTP_CHUNK_MATCH_ANY:
 				if (match_flags(flag_info, flag_count,
 					sch->type, sch->flags)) {
-					return 1;
+					return true;
 				}
 				break;
 
@@ -94,14 +94,14 @@ match_packet(const struct sk_buff *skb,
 			case SCTP_CHUNK_MATCH_ONLY:
 				if (!match_flags(flag_info, flag_count,
 					sch->type, sch->flags)) {
-					return 0;
+					return false;
 				}
 				break;
 			}
 		} else {
 			switch (chunk_match_type) {
 			case SCTP_CHUNK_MATCH_ONLY:
-				return 0;
+				return false;
 			}
 		}
 	} while (offset < skb->len);
@@ -110,16 +110,16 @@ match_packet(const struct sk_buff *skb,
 	case SCTP_CHUNK_MATCH_ALL:
 		return SCTP_CHUNKMAP_IS_CLEAR(chunkmap);
 	case SCTP_CHUNK_MATCH_ANY:
-		return 0;
+		return false;
 	case SCTP_CHUNK_MATCH_ONLY:
-		return 1;
+		return true;
 	}
 
 	/* This will never be reached, but required to stop compiler whine */
-	return 0;
+	return false;
 }
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -134,14 +134,14 @@ match(const struct sk_buff *skb,
 
 	if (offset) {
 		duprintf("Dropping non-first fragment.. FIXME\n");
-		return 0;
+		return false;
 	}
 
 	sh = skb_header_pointer(skb, protoff, sizeof(_sh), &_sh);
 	if (sh == NULL) {
 		duprintf("Dropping evil TCP offset=0 tinygram.\n");
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 	duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
 
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 74fe069..f77f74a 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -20,7 +20,7 @@ MODULE_DESCRIPTION("ip[6]_tables connection tracking state match module");
 MODULE_ALIAS("ipt_state");
 MODULE_ALIAS("ip6t_state");
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 4e5ed81..989924f 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -24,26 +24,26 @@ MODULE_ALIAS("ip6t_statistic");
 
 static DEFINE_SPINLOCK(nth_lock);
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in, const struct net_device *out,
       const struct xt_match *match, const void *matchinfo,
       int offset, unsigned int protoff, bool *hotdrop)
 {
 	struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo;
-	int ret = info->flags & XT_STATISTIC_INVERT ? 1 : 0;
+	bool ret = info->flags & XT_STATISTIC_INVERT;
 
 	switch (info->mode) {
 	case XT_STATISTIC_MODE_RANDOM:
 		if ((net_random() & 0x7FFFFFFF) < info->u.random.probability)
-			ret ^= 1;
+			ret = !ret;
 		break;
 	case XT_STATISTIC_MODE_NTH:
 		info = info->master;
 		spin_lock_bh(&nth_lock);
 		if (info->u.nth.count++ == info->u.nth.every) {
 			info->u.nth.count = 0;
-			ret ^= 1;
+			ret = !ret;
 		}
 		spin_unlock_bh(&nth_lock);
 		break;
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 7552d89..3aea43d 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -21,14 +21,14 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_string");
 MODULE_ALIAS("ip6t_string");
 
-static int match(const struct sk_buff *skb,
-		 const struct net_device *in,
-		 const struct net_device *out,
-		 const struct xt_match *match,
-		 const void *matchinfo,
-		 int offset,
-		 unsigned int protoff,
-		 bool *hotdrop)
+static bool match(const struct sk_buff *skb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  const struct xt_match *match,
+		  const void *matchinfo,
+		  int offset,
+		  unsigned int protoff,
+		  bool *hotdrop)
 {
 	const struct xt_string_info *conf = matchinfo;
 	struct ts_state state;
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 0db4f53..e9bfd3d 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -23,7 +23,7 @@ MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables TCP MSS match module");
 MODULE_ALIAS("ipt_tcpmss");
 
-static int
+static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
@@ -78,7 +78,7 @@ out:
 
 dropit:
 	*hotdrop = true;
-	return 0;
+	return false;
 }
 
 static struct xt_match xt_tcpmss_match[] = {
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index ca9ccdd..9ecc4a5 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -27,21 +27,18 @@ MODULE_ALIAS("ip6t_tcp");
 
 
 /* Returns 1 if the port is matched by the range, 0 otherwise */
-static inline int
-port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
+static inline bool
+port_match(u_int16_t min, u_int16_t max, u_int16_t port, bool invert)
 {
-	int ret;
-
-	ret = (port >= min && port <= max) ^ invert;
-	return ret;
+	return (port >= min && port <= max) ^ invert;
 }
 
-static int
+static bool
 tcp_find_option(u_int8_t option,
 		const struct sk_buff *skb,
 		unsigned int protoff,
 		unsigned int optlen,
-		int invert,
+		bool invert,
 		bool *hotdrop)
 {
 	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
@@ -58,7 +55,7 @@ tcp_find_option(u_int8_t option,
 				optlen, _opt);
 	if (op == NULL) {
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	for (i = 0; i < optlen; ) {
@@ -70,7 +67,7 @@ tcp_find_option(u_int8_t option,
 	return invert;
 }
 
-static int
+static bool
 tcp_match(const struct sk_buff *skb,
 	  const struct net_device *in,
 	  const struct net_device *out,
@@ -95,7 +92,7 @@ tcp_match(const struct sk_buff *skb,
 			*hotdrop = true;
 		}
 		/* Must not be a fragment. */
-		return 0;
+		return false;
 	}
 
 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
@@ -106,33 +103,33 @@ tcp_match(const struct sk_buff *skb,
 		   can't.  Hence, no choice but to drop. */
 		duprintf("Dropping evil TCP offset=0 tinygram.\n");
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
 			ntohs(th->source),
 			!!(tcpinfo->invflags & XT_TCP_INV_SRCPT)))
-		return 0;
+		return false;
 	if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
 			ntohs(th->dest),
 			!!(tcpinfo->invflags & XT_TCP_INV_DSTPT)))
-		return 0;
+		return false;
 	if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
 		      == tcpinfo->flg_cmp,
 		      XT_TCP_INV_FLAGS))
-		return 0;
+		return false;
 	if (tcpinfo->option) {
 		if (th->doff * 4 < sizeof(_tcph)) {
 			*hotdrop = true;
-			return 0;
+			return false;
 		}
 		if (!tcp_find_option(tcpinfo->option, skb, protoff,
 				     th->doff*4 - sizeof(_tcph),
 				     tcpinfo->invflags & XT_TCP_INV_OPTION,
 				     hotdrop))
-			return 0;
+			return false;
 	}
-	return 1;
+	return true;
 }
 
 /* Called when user tries to insert an entry of this type. */
@@ -149,7 +146,7 @@ tcp_checkentry(const char *tablename,
 	return !(tcpinfo->invflags & ~XT_TCP_INV_MASK);
 }
 
-static int
+static bool
 udp_match(const struct sk_buff *skb,
 	  const struct net_device *in,
 	  const struct net_device *out,
@@ -164,7 +161,7 @@ udp_match(const struct sk_buff *skb,
 
 	/* Must not be a fragment. */
 	if (offset)
-		return 0;
+		return false;
 
 	uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph);
 	if (uh == NULL) {
@@ -172,7 +169,7 @@ udp_match(const struct sk_buff *skb,
 		   can't.  Hence, no choice but to drop. */
 		duprintf("Dropping evil UDP tinygram.\n");
 		*hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	return port_match(udpinfo->spts[0], udpinfo->spts[1],
-- 
cgit v0.10.2


From ccb79bdce71f2c04cfa9bfcbaf4d37e2f963d684 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Sat, 7 Jul 2007 22:16:00 -0700
Subject: [NETFILTER]: x_tables: switch xt_match->checkentry to bool

Switch the return type of match functions to boolean

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 304fce3..5130dd6 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -152,11 +152,11 @@ struct xt_match
 
 	/* Called when user tries to insert an entry of this type. */
 	/* Should return true or false. */
-	int (*checkentry)(const char *tablename,
-			  const void *ip,
-			  const struct xt_match *match,
-			  void *matchinfo,
-			  unsigned int hook_mask);
+	bool (*checkentry)(const char *tablename,
+			   const void *ip,
+			   const struct xt_match *match,
+			   void *matchinfo,
+			   unsigned int hook_mask);
 
 	/* Called when entry of this type deleted. */
 	void (*destroy)(const struct xt_match *match, void *matchinfo);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b9c792d..7962306 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -152,20 +152,20 @@ ip_packet_match(const struct iphdr *ip,
 	return 1;
 }
 
-static inline int
+static inline bool
 ip_checkentry(const struct ipt_ip *ip)
 {
 	if (ip->flags & ~IPT_F_MASK) {
 		duprintf("Unknown flag bits set: %08X\n",
 			 ip->flags & ~IPT_F_MASK);
-		return 0;
+		return false;
 	}
 	if (ip->invflags & ~IPT_INV_MASK) {
 		duprintf("Unknown invflag bits set: %08X\n",
 			 ip->invflags & ~IPT_INV_MASK);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static unsigned int
@@ -2149,7 +2149,7 @@ icmp_match(const struct sk_buff *skb,
 }
 
 /* Called when user tries to insert an entry of this type. */
-static int
+static bool
 icmp_checkentry(const char *tablename,
 	   const void *info,
 	   const struct xt_match *match,
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 3da39ee..6b5b7c9 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -70,7 +70,7 @@ match(const struct sk_buff *skb,
 }
 
 /* Called when user tries to insert an entry of this type. */
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *ip_void,
 	   const struct xt_match *match,
@@ -82,9 +82,9 @@ checkentry(const char *tablename,
 	/* Must specify no unknown invflags */
 	if (ahinfo->invflags & ~IPT_AH_INV_MASK) {
 		duprintf("ipt_ah: unknown flags %X\n", ahinfo->invflags);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_match ah_match = {
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index ba3a17e..ba4f549 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -87,27 +87,27 @@ static bool match(const struct sk_buff *skb,
 	return true;
 }
 
-static int checkentry(const char *tablename, const void *ip_void,
-		      const struct xt_match *match,
-		      void *matchinfo, unsigned int hook_mask)
+static bool checkentry(const char *tablename, const void *ip_void,
+		       const struct xt_match *match,
+		       void *matchinfo, unsigned int hook_mask)
 {
 	const struct ipt_ecn_info *info = matchinfo;
 	const struct ipt_ip *ip = ip_void;
 
 	if (info->operation & IPT_ECN_OP_MATCH_MASK)
-		return 0;
+		return false;
 
 	if (info->invert & IPT_ECN_OP_MATCH_MASK)
-		return 0;
+		return false;
 
 	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)
 	    && ip->proto != IPPROTO_TCP) {
 		printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for"
 		       " non-tcp packets\n");
-		return 0;
+		return false;
 	}
 
-	return 1;
+	return true;
 }
 
 static struct xt_match ecn_match = {
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 8f441ce..deea4b8 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -51,7 +51,7 @@ match(const struct sk_buff *skb,
 	return true;
 }
 
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *ip,
 	   const struct xt_match *match,
@@ -63,9 +63,9 @@ checkentry(const char *tablename,
 	if (info->match & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) {
 		printk("ipt_owner: pid, sid and command matching "
 		       "not supported anymore\n");
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_match owner_match = {
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 2e513ed..d632e0e 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -235,7 +235,7 @@ out:
 	return ret;
 }
 
-static int
+static bool
 ipt_recent_checkentry(const char *tablename, const void *ip,
 		      const struct xt_match *match, void *matchinfo,
 		      unsigned int hook_mask)
@@ -243,24 +243,24 @@ ipt_recent_checkentry(const char *tablename, const void *ip,
 	const struct ipt_recent_info *info = matchinfo;
 	struct recent_table *t;
 	unsigned i;
-	int ret = 0;
+	bool ret = false;
 
 	if (hweight8(info->check_set &
 		     (IPT_RECENT_SET | IPT_RECENT_REMOVE |
 		      IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) != 1)
-		return 0;
+		return false;
 	if ((info->check_set & (IPT_RECENT_SET | IPT_RECENT_REMOVE)) &&
 	    (info->seconds || info->hit_count))
-		return 0;
+		return false;
 	if (info->name[0] == '\0' ||
 	    strnlen(info->name, IPT_RECENT_NAME_LEN) == IPT_RECENT_NAME_LEN)
-		return 0;
+		return false;
 
 	mutex_lock(&recent_mutex);
 	t = recent_table_lookup(info->name);
 	if (t != NULL) {
 		t->refcnt++;
-		ret = 1;
+		ret = true;
 		goto out;
 	}
 
@@ -287,7 +287,7 @@ ipt_recent_checkentry(const char *tablename, const void *ip,
 	spin_lock_bh(&recent_lock);
 	list_add_tail(&t->list, &tables);
 	spin_unlock_bh(&recent_lock);
-	ret = 1;
+	ret = true;
 out:
 	mutex_unlock(&recent_mutex);
 	return ret;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 31f42e8..7fe4d29 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -188,20 +188,20 @@ ip6_packet_match(const struct sk_buff *skb,
 }
 
 /* should be ip6 safe */
-static inline int
+static inline bool
 ip6_checkentry(const struct ip6t_ip6 *ipv6)
 {
 	if (ipv6->flags & ~IP6T_F_MASK) {
 		duprintf("Unknown flag bits set: %08X\n",
 			 ipv6->flags & ~IP6T_F_MASK);
-		return 0;
+		return false;
 	}
 	if (ipv6->invflags & ~IP6T_INV_MASK) {
 		duprintf("Unknown invflag bits set: %08X\n",
 			 ipv6->invflags & ~IP6T_INV_MASK);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static unsigned int
@@ -1282,10 +1282,10 @@ void ip6t_unregister_table(struct xt_table *table)
 }
 
 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
-static inline int
+static inline bool
 icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 		     u_int8_t type, u_int8_t code,
-		     int invert)
+		     bool invert)
 {
 	return (type == test_type && code >= min_code && code <= max_code)
 		^ invert;
@@ -1325,7 +1325,7 @@ icmp6_match(const struct sk_buff *skb,
 }
 
 /* Called when user tries to insert an entry of this type. */
-static int
+static bool
 icmp6_checkentry(const char *tablename,
 	   const void *entry,
 	   const struct xt_match *match,
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 607c2eb..8fc00bd 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -103,7 +103,7 @@ match(const struct sk_buff *skb,
 }
 
 /* Called when user tries to insert an entry of this type. */
-static int
+static bool
 checkentry(const char *tablename,
 	  const void *entry,
 	  const struct xt_match *match,
@@ -114,9 +114,9 @@ checkentry(const char *tablename,
 
 	if (ahinfo->invflags & ~IP6T_AH_INV_MASK) {
 		DEBUGP("ip6t_ah: unknown flags %X\n", ahinfo->invflags);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_match ah_match = {
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 0ed5fbc..f0aed89 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -120,7 +120,7 @@ match(const struct sk_buff *skb,
 }
 
 /* Called when user tries to insert an entry of this type. */
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *ip,
 	   const struct xt_match *match,
@@ -131,9 +131,9 @@ checkentry(const char *tablename,
 
 	if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
 		DEBUGP("ip6t_frag: unknown flags %X\n", fraginfo->invflags);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_match frag_match = {
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 4b05393..6fdd797 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -174,7 +174,7 @@ match(const struct sk_buff *skb,
 }
 
 /* Called when user tries to insert an entry of this type. */
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *entry,
 	   const struct xt_match *match,
@@ -185,9 +185,9 @@ checkentry(const char *tablename,
 
 	if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
 		DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_match opts_match[] = {
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 3222e89..5ba6ef0 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -124,7 +124,7 @@ ipv6header_match(const struct sk_buff *skb,
 	}
 }
 
-static int
+static bool
 ipv6header_checkentry(const char *tablename,
 		      const void *ip,
 		      const struct xt_match *match,
@@ -136,9 +136,9 @@ ipv6header_checkentry(const char *tablename,
 	/* invflags is 0 or 0xff in hard mode */
 	if ((!info->modeflag) && info->invflags != 0x00 &&
 	    info->invflags != 0xFF)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
 static struct xt_match ip6t_ipv6header_match = {
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index ddffe03..a3008b4 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -75,7 +75,7 @@ match(const struct sk_buff *skb,
 }
 
 /* Called when user tries to insert an entry of this type. */
-static int
+static bool
 mh_checkentry(const char *tablename,
 	      const void *entry,
 	      const struct xt_match *match,
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index cadd0a6..8cb6c94 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -53,7 +53,7 @@ match(const struct sk_buff *skb,
 	return true;
 }
 
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *ip,
 	   const struct xt_match *match,
@@ -65,9 +65,9 @@ checkentry(const char *tablename,
 	if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) {
 		printk("ipt_owner: pid and sid matching "
 		       "not supported anymore\n");
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_match owner_match = {
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 7966f4a..e991ed4 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -198,7 +198,7 @@ match(const struct sk_buff *skb,
 }
 
 /* Called when user tries to insert an entry of this type. */
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *entry,
 	   const struct xt_match *match,
@@ -209,17 +209,17 @@ checkentry(const char *tablename,
 
 	if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
 		DEBUGP("ip6t_rt: unknown flags %X\n", rtinfo->invflags);
-		return 0;
+		return false;
 	}
 	if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) &&
 	    (!(rtinfo->flags & IP6T_RT_TYP) ||
 	     (rtinfo->rt_type != 0) ||
 	     (rtinfo->invflags & IP6T_RT_INV_TYP))) {
 		DEBUGP("`--rt-type 0' required before `--rt-0-*'");
-		return 0;
+		return false;
 	}
 
-	return 1;
+	return true;
 }
 
 static struct xt_match rt_match = {
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index aada7b7..1254178 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -95,31 +95,31 @@ match(const struct sk_buff *skb,
 		return (what >= sinfo->count.from);
 }
 
-static int check(const char *tablename,
-		 const void *ip,
-		 const struct xt_match *match,
-		 void *matchinfo,
-		 unsigned int hook_mask)
+static bool check(const char *tablename,
+		  const void *ip,
+		  const struct xt_match *match,
+		  void *matchinfo,
+		  unsigned int hook_mask)
 {
 	const struct xt_connbytes_info *sinfo = matchinfo;
 
 	if (sinfo->what != XT_CONNBYTES_PKTS &&
 	    sinfo->what != XT_CONNBYTES_BYTES &&
 	    sinfo->what != XT_CONNBYTES_AVGPKT)
-		return 0;
+		return false;
 
 	if (sinfo->direction != XT_CONNBYTES_DIR_ORIGINAL &&
 	    sinfo->direction != XT_CONNBYTES_DIR_REPLY &&
 	    sinfo->direction != XT_CONNBYTES_DIR_BOTH)
-		return 0;
+		return false;
 
 	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
 				    "proto=%d\n", match->family);
-		return 0;
+		return false;
 	}
 
-	return 1;
+	return true;
 }
 
 static void
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 3321b80..94d5251 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -51,7 +51,7 @@ match(const struct sk_buff *skb,
 	return (((ct->mark) & info->mask) == info->mark) ^ info->invert;
 }
 
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *ip,
 	   const struct xt_match *match,
@@ -62,14 +62,14 @@ checkentry(const char *tablename,
 
 	if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) {
 		printk(KERN_WARNING "connmark: only support 32bit mark\n");
-		return 0;
+		return false;
 	}
 	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
 				    "proto=%d\n", match->family);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static void
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 26901f9..87364f5 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -114,7 +114,7 @@ match(const struct sk_buff *skb,
 	return true;
 }
 
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *ip,
 	   const struct xt_match *match,
@@ -124,9 +124,9 @@ checkentry(const char *tablename,
 	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
 				    "proto=%d\n", match->family);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static void destroy(const struct xt_match *match, void *matchinfo)
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index b0eba4e..2489590 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -126,7 +126,7 @@ match(const struct sk_buff *skb,
 			   XT_DCCP_OPTION, info->flags, info->invflags);
 }
 
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *inf,
 	   const struct xt_match *match,
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index c9c6518..35cabca 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -52,20 +52,20 @@ static bool match6(const struct sk_buff *skb,
 	return (dscp == info->dscp) ^ !!info->invert;
 }
 
-static int checkentry(const char *tablename,
-		      const void *info,
-		      const struct xt_match *match,
-		      void *matchinfo,
-		      unsigned int hook_mask)
+static bool checkentry(const char *tablename,
+		       const void *info,
+		       const struct xt_match *match,
+		       void *matchinfo,
+		       unsigned int hook_mask)
 {
 	const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp;
 
 	if (dscp > XT_DSCP_MAX) {
 		printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp);
-		return 0;
+		return false;
 	}
 
-	return 1;
+	return true;
 }
 
 static struct xt_match xt_dscp_match[] = {
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 1a945cb..1a6ae8a 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -74,7 +74,7 @@ match(const struct sk_buff *skb,
 }
 
 /* Called when user tries to insert an entry of this type. */
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *ip_void,
 	   const struct xt_match *match,
@@ -85,10 +85,10 @@ checkentry(const char *tablename,
 
 	if (espinfo->invflags & ~XT_ESP_INV_MASK) {
 		duprintf("xt_esp: unknown flags %X\n", espinfo->invflags);
-		return 0;
+		return false;
 	}
 
-	return 1;
+	return true;
 }
 
 static struct xt_match xt_esp_match[] = {
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 21597b7..a1b5996 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -492,7 +492,7 @@ hotdrop:
 	return false;
 }
 
-static int
+static bool
 hashlimit_checkentry(const char *tablename,
 		     const void *inf,
 		     const struct xt_match *match,
@@ -506,20 +506,20 @@ hashlimit_checkentry(const char *tablename,
 	    user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) {
 		printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n",
 		       r->cfg.avg, r->cfg.burst);
-		return 0;
+		return false;
 	}
 	if (r->cfg.mode == 0 ||
 	    r->cfg.mode > (XT_HASHLIMIT_HASH_DPT |
 			   XT_HASHLIMIT_HASH_DIP |
 			   XT_HASHLIMIT_HASH_SIP |
 			   XT_HASHLIMIT_HASH_SPT))
-		return 0;
+		return false;
 	if (!r->cfg.gc_interval)
-		return 0;
+		return false;
 	if (!r->cfg.expire)
-		return 0;
+		return false;
 	if (r->name[sizeof(r->name) - 1] != '\0')
-		return 0;
+		return false;
 
 	/* This is the best we've got: We cannot release and re-grab lock,
 	 * since checkentry() is called before x_tables.c grabs xt_mutex.
@@ -531,13 +531,13 @@ hashlimit_checkentry(const char *tablename,
 	r->hinfo = htable_find_get(r->name, match->family);
 	if (!r->hinfo && htable_create(r, match->family) != 0) {
 		mutex_unlock(&hlimit_mutex);
-		return 0;
+		return false;
 	}
 	mutex_unlock(&hlimit_mutex);
 
 	/* Ugly hack: For SMP, we only want to use one set */
 	r->u.master = r;
-	return 1;
+	return true;
 }
 
 static void
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 10c629b..a2688b8 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -76,21 +76,21 @@ out_unlock:
 	return ret;
 }
 
-static int check(const char *tablename,
-		 const void *inf,
-		 const struct xt_match *match,
-		 void *matchinfo,
-		 unsigned int hook_mask)
+static bool check(const char *tablename,
+		  const void *inf,
+		  const struct xt_match *match,
+		  void *matchinfo,
+		  unsigned int hook_mask)
 {
 	struct xt_helper_info *info = matchinfo;
 
 	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
 				    "proto=%d\n", match->family);
-		return 0;
+		return false;
 	}
 	info->name[29] = '\0';
-	return 1;
+	return true;
 }
 
 static void
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 0cfe241..2717aa6 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -98,7 +98,7 @@ user2credits(u_int32_t user)
 	return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE;
 }
 
-static int
+static bool
 ipt_limit_checkentry(const char *tablename,
 		     const void *inf,
 		     const struct xt_match *match,
@@ -112,7 +112,7 @@ ipt_limit_checkentry(const char *tablename,
 	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
 		printk("Overflow in xt_limit, try lower: %u/%u\n",
 		       r->avg, r->burst);
-		return 0;
+		return false;
 	}
 
 	/* For SMP, we only want to use one set of counters. */
@@ -125,7 +125,7 @@ ipt_limit_checkentry(const char *tablename,
 		r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
 		r->cost = user2credits(r->avg);
 	}
-	return 1;
+	return true;
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 10c6799..83ed8067 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -34,7 +34,7 @@ match(const struct sk_buff *skb,
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *entry,
 	   const struct xt_match *match,
@@ -45,9 +45,9 @@ checkentry(const char *tablename,
 
 	if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) {
 		printk(KERN_WARNING "mark: only supports 32bit mark\n");
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 55feb3d..3d69d62 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -154,7 +154,7 @@ match_v1(const struct sk_buff *skb,
 	return ports_match_v1(multiinfo, ntohs(pptr[0]), ntohs(pptr[1]));
 }
 
-static inline int
+static inline bool
 check(u_int16_t proto,
       u_int8_t ip_invflags,
       u_int8_t match_flags,
@@ -172,7 +172,7 @@ check(u_int16_t proto,
 }
 
 /* Called when user tries to insert an entry of this type. */
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *info,
 	   const struct xt_match *match,
@@ -186,7 +186,7 @@ checkentry(const char *tablename,
 		     multiinfo->count);
 }
 
-static int
+static bool
 checkentry_v1(const char *tablename,
 	      const void *info,
 	      const struct xt_match *match,
@@ -200,7 +200,7 @@ checkentry_v1(const char *tablename,
 		     multiinfo->count);
 }
 
-static int
+static bool
 checkentry6(const char *tablename,
 	    const void *info,
 	    const struct xt_match *match,
@@ -214,7 +214,7 @@ checkentry6(const char *tablename,
 		     multiinfo->count);
 }
 
-static int
+static bool
 checkentry6_v1(const char *tablename,
 	       const void *info,
 	       const struct xt_match *match,
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 70de670..34f0d3e 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -99,7 +99,7 @@ match_outdev:
 	return ret ^ !(info->invert & XT_PHYSDEV_OP_OUT);
 }
 
-static int
+static bool
 checkentry(const char *tablename,
 		       const void *ip,
 		       const struct xt_match *match,
@@ -110,7 +110,7 @@ checkentry(const char *tablename,
 
 	if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
 	    info->bitmask & ~XT_PHYSDEV_OP_MASK)
-		return 0;
+		return false;
 	if (info->bitmask & XT_PHYSDEV_OP_OUT &&
 	    (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
 	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
@@ -120,9 +120,9 @@ checkentry(const char *tablename,
 		       "OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
 		       "traffic is not supported anymore.\n");
 		if (hook_mask & (1 << NF_IP_LOCAL_OUT))
-			return 0;
+			return false;
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_match xt_physdev_match[] = {
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 0aa487b..1534de5 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -133,35 +133,35 @@ static bool match(const struct sk_buff *skb,
 	return ret;
 }
 
-static int checkentry(const char *tablename, const void *ip_void,
-		      const struct xt_match *match,
-		      void *matchinfo, unsigned int hook_mask)
+static bool checkentry(const char *tablename, const void *ip_void,
+		       const struct xt_match *match,
+		       void *matchinfo, unsigned int hook_mask)
 {
 	struct xt_policy_info *info = matchinfo;
 
 	if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) {
 		printk(KERN_ERR "xt_policy: neither incoming nor "
 				"outgoing policy selected\n");
-		return 0;
+		return false;
 	}
 	/* hook values are equal for IPv4 and IPv6 */
 	if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN)
 	    && info->flags & XT_POLICY_MATCH_OUT) {
 		printk(KERN_ERR "xt_policy: output policy not valid in "
 				"PRE_ROUTING and INPUT\n");
-		return 0;
+		return false;
 	}
 	if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT)
 	    && info->flags & XT_POLICY_MATCH_IN) {
 		printk(KERN_ERR "xt_policy: input policy not valid in "
 				"POST_ROUTING and OUTPUT\n");
-		return 0;
+		return false;
 	}
 	if (info->len > XT_POLICY_MAX_ELEM) {
 		printk(KERN_ERR "xt_policy: too many policy elements\n");
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_match xt_policy_match[] = {
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 6091347..e13d62a 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -38,7 +38,7 @@ match(const struct sk_buff *skb,
 	return ret;
 }
 
-static int
+static bool
 checkentry(const char *tablename, const void *entry,
 	   const struct xt_match *match, void *matchinfo,
 	   unsigned int hook_mask)
@@ -46,10 +46,10 @@ checkentry(const char *tablename, const void *entry,
 	struct xt_quota_info *q = (struct xt_quota_info *)matchinfo;
 
 	if (q->flags & ~XT_QUOTA_MASK)
-		return 0;
+		return false;
 	/* For SMP, we only want to use one set of counters. */
 	q->master = q;
-	return 1;
+	return true;
 }
 
 static struct xt_match xt_quota_match[] = {
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index a118a4c..22df338 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -158,7 +158,7 @@ match(const struct sk_buff *skb,
 			   XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
 }
 
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *inf,
 	   const struct xt_match *match,
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index f77f74a..5b9c59a 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -44,18 +44,18 @@ match(const struct sk_buff *skb,
 	return (sinfo->statemask & statebit);
 }
 
-static int check(const char *tablename,
-		 const void *inf,
-		 const struct xt_match *match,
-		 void *matchinfo,
-		 unsigned int hook_mask)
+static bool check(const char *tablename,
+		  const void *inf,
+		  const struct xt_match *match,
+		  void *matchinfo,
+		  unsigned int hook_mask)
 {
 	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
 				    "proto=%d\n", match->family);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static void
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 989924f..0af4289 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -52,7 +52,7 @@ match(const struct sk_buff *skb,
 	return ret;
 }
 
-static int
+static bool
 checkentry(const char *tablename, const void *entry,
 	   const struct xt_match *match, void *matchinfo,
 	   unsigned int hook_mask)
@@ -61,9 +61,9 @@ checkentry(const char *tablename, const void *entry,
 
 	if (info->mode > XT_STATISTIC_MODE_MAX ||
 	    info->flags & ~XT_STATISTIC_MASK)
-		return 0;
+		return false;
 	info->master = info;
-	return 1;
+	return true;
 }
 
 static struct xt_match xt_statistic_match[] = {
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 3aea43d..ab761b1 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -42,30 +42,30 @@ static bool match(const struct sk_buff *skb,
 
 #define STRING_TEXT_PRIV(m) ((struct xt_string_info *) m)
 
-static int checkentry(const char *tablename,
-		      const void *ip,
-		      const struct xt_match *match,
-		      void *matchinfo,
-		      unsigned int hook_mask)
+static bool checkentry(const char *tablename,
+		       const void *ip,
+		       const struct xt_match *match,
+		       void *matchinfo,
+		       unsigned int hook_mask)
 {
 	struct xt_string_info *conf = matchinfo;
 	struct ts_config *ts_conf;
 
 	/* Damn, can't handle this case properly with iptables... */
 	if (conf->from_offset > conf->to_offset)
-		return 0;
+		return false;
 	if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0')
-		return 0;
+		return false;
 	if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE)
-		return 0;
+		return false;
 	ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
 				     GFP_KERNEL, TS_AUTOLOAD);
 	if (IS_ERR(ts_conf))
-		return 0;
+		return false;
 
 	conf->config = ts_conf;
 
-	return 1;
+	return true;
 }
 
 static void destroy(const struct xt_match *match, void *matchinfo)
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 9ecc4a5..0dd3022 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -133,7 +133,7 @@ tcp_match(const struct sk_buff *skb,
 }
 
 /* Called when user tries to insert an entry of this type. */
-static int
+static bool
 tcp_checkentry(const char *tablename,
 	       const void *info,
 	       const struct xt_match *match,
@@ -181,7 +181,7 @@ udp_match(const struct sk_buff *skb,
 }
 
 /* Called when user tries to insert an entry of this type. */
-static int
+static bool
 udp_checkentry(const char *tablename,
 	       const void *info,
 	       const struct xt_match *match,
-- 
cgit v0.10.2


From e1931b784a8de324abf310fa3b5e3f25d3988233 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Sat, 7 Jul 2007 22:16:26 -0700
Subject: [NETFILTER]: x_tables: switch xt_target->checkentry to bool

Switch the return type of target checkentry functions to boolean.

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 5130dd6..64f425a8 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -202,11 +202,11 @@ struct xt_target
            hook_mask is a bitmask of hooks from which it can be
            called. */
 	/* Should return true or false. */
-	int (*checkentry)(const char *tablename,
-			  const void *entry,
-			  const struct xt_target *target,
-			  void *targinfo,
-			  unsigned int hook_mask);
+	bool (*checkentry)(const char *tablename,
+			   const void *entry,
+			   const struct xt_target *target,
+			   void *targinfo,
+			   unsigned int hook_mask);
 
 	/* Called when entry of this type deleted. */
 	void (*destroy)(const struct xt_target *target, void *targinfo);
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 6298d40..497a16e 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -65,7 +65,7 @@ target(struct sk_buff **pskb,
 	return mangle->target;
 }
 
-static int
+static bool
 checkentry(const char *tablename, const void *e, const struct xt_target *target,
 	   void *targinfo, unsigned int hook_mask)
 {
@@ -73,12 +73,12 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target,
 
 	if (mangle->flags & ~ARPT_MANGLE_MASK ||
 	    !(mangle->flags & ARPT_MANGLE_MASK))
-		return 0;
+		return false;
 
 	if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT &&
 	   mangle->target != ARPT_CONTINUE)
-		return 0;
-	return 1;
+		return false;
+	return true;
 }
 
 static struct arpt_target arpt_mangle_reg = {
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 40e2734..e82339a 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -220,17 +220,17 @@ clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
 	return 0;
 }
 
-static int
+static bool
 clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
 {
 	if (nodenum == 0 ||
 	    nodenum > c->num_total_nodes)
-		return 1;
+		return true;
 
 	if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 #endif
 
@@ -370,7 +370,7 @@ target(struct sk_buff **pskb,
 	return XT_CONTINUE;
 }
 
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *e_void,
 	   const struct xt_target *target,
@@ -387,13 +387,13 @@ checkentry(const char *tablename,
 	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
 		printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n",
 			cipinfo->hash_mode);
-		return 0;
+		return false;
 
 	}
 	if (e->ip.dmsk.s_addr != htonl(0xffffffff)
 	    || e->ip.dst.s_addr == 0) {
 		printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n");
-		return 0;
+		return false;
 	}
 
 	/* FIXME: further sanity checks */
@@ -407,7 +407,7 @@ checkentry(const char *tablename,
 			if (cipinfo->config != config) {
 				printk(KERN_ERR "CLUSTERIP: Reloaded entry "
 				       "has invalid config pointer!\n");
-				return 0;
+				return false;
 			}
 		} else {
 			/* Case B: This is a new rule referring to an existing
@@ -418,19 +418,19 @@ checkentry(const char *tablename,
 		/* Case C: This is a completely new clusterip config */
 		if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
 			printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
-			return 0;
+			return false;
 		} else {
 			struct net_device *dev;
 
 			if (e->ip.iniface[0] == '\0') {
 				printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n");
-				return 0;
+				return false;
 			}
 
 			dev = dev_get_by_name(e->ip.iniface);
 			if (!dev) {
 				printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface);
-				return 0;
+				return false;
 			}
 
 			config = clusterip_config_init(cipinfo,
@@ -438,7 +438,7 @@ checkentry(const char *tablename,
 			if (!config) {
 				printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n");
 				dev_put(dev);
-				return 0;
+				return false;
 			}
 			dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
 		}
@@ -448,10 +448,10 @@ checkentry(const char *tablename,
 	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
 				    "proto=%d\n", target->family);
-		return 0;
+		return false;
 	}
 
-	return 1;
+	return true;
 }
 
 /* drop reference count of cluster config when rule is deleted */
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 918ca92..0236701 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -24,8 +24,8 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("iptables ECN modification module");
 
 /* set ECT codepoint from IP header.
- * 	return 0 if there was an error. */
-static inline int
+ * 	return false if there was an error. */
+static inline bool
 set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 {
 	struct iphdr *iph = ip_hdr(*pskb);
@@ -33,18 +33,18 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 	if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
 		__u8 oldtos;
 		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
-			return 0;
+			return false;
 		iph = ip_hdr(*pskb);
 		oldtos = iph->tos;
 		iph->tos &= ~IPT_ECN_IP_MASK;
 		iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
 		nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
 	}
-	return 1;
+	return true;
 }
 
-/* Return 0 if there was an error. */
-static inline int
+/* Return false if there was an error. */
+static inline bool
 set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 {
 	struct tcphdr _tcph, *tcph;
@@ -54,16 +54,16 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 	tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
 				  sizeof(_tcph), &_tcph);
 	if (!tcph)
-		return 0;
+		return false;
 
 	if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) ||
 	     tcph->ece == einfo->proto.tcp.ece) &&
 	    ((!(einfo->operation & IPT_ECN_OP_SET_CWR) ||
 	     tcph->cwr == einfo->proto.tcp.cwr)))
-		return 1;
+		return true;
 
 	if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
-		return 0;
+		return false;
 	tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb);
 
 	oldval = ((__be16 *)tcph)[6];
@@ -74,7 +74,7 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 
 	nf_proto_csum_replace2(&tcph->check, *pskb,
 				oldval, ((__be16 *)tcph)[6], 0);
-	return 1;
+	return true;
 }
 
 static unsigned int
@@ -99,7 +99,7 @@ target(struct sk_buff **pskb,
 	return XT_CONTINUE;
 }
 
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *e_void,
 	   const struct xt_target *target,
@@ -112,20 +112,20 @@ checkentry(const char *tablename,
 	if (einfo->operation & IPT_ECN_OP_MASK) {
 		printk(KERN_WARNING "ECN: unsupported ECN operation %x\n",
 			einfo->operation);
-		return 0;
+		return false;
 	}
 	if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
 		printk(KERN_WARNING "ECN: new ECT codepoint %x out of mask\n",
 			einfo->ip_ect);
-		return 0;
+		return false;
 	}
 	if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR))
 	    && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
 		printk(KERN_WARNING "ECN: cannot use TCP operations on a "
 		       "non-tcp rule\n");
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_target ipt_ecn_reg = {
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index a42c5cd..bbff6c3 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -435,24 +435,24 @@ ipt_log_target(struct sk_buff **pskb,
 	return XT_CONTINUE;
 }
 
-static int ipt_log_checkentry(const char *tablename,
-			      const void *e,
-			      const struct xt_target *target,
-			      void *targinfo,
-			      unsigned int hook_mask)
+static bool ipt_log_checkentry(const char *tablename,
+			       const void *e,
+			       const struct xt_target *target,
+			       void *targinfo,
+			       unsigned int hook_mask)
 {
 	const struct ipt_log_info *loginfo = targinfo;
 
 	if (loginfo->level >= 8) {
 		DEBUGP("LOG: level %u >= 8\n", loginfo->level);
-		return 0;
+		return false;
 	}
 	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
 		DEBUGP("LOG: prefix term %i\n",
 		       loginfo->prefix[sizeof(loginfo->prefix)-1]);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_target ipt_log_reg = {
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index d4f2d77..b5b2164 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -37,7 +37,7 @@ MODULE_DESCRIPTION("iptables MASQUERADE target module");
 static DEFINE_RWLOCK(masq_lock);
 
 /* FIXME: Multiple targets. --RR */
-static int
+static bool
 masquerade_check(const char *tablename,
 		 const void *e,
 		 const struct xt_target *target,
@@ -48,13 +48,13 @@ masquerade_check(const char *tablename,
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
 		DEBUGP("masquerade_check: bad MAP_IPS.\n");
-		return 0;
+		return false;
 	}
 	if (mr->rangesize != 1) {
 		DEBUGP("masquerade_check: bad rangesize %u.\n", mr->rangesize);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static unsigned int
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 068c69b..a902c71 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -29,7 +29,7 @@ MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target");
 #define DEBUGP(format, args...)
 #endif
 
-static int
+static bool
 check(const char *tablename,
       const void *e,
       const struct xt_target *target,
@@ -40,13 +40,13 @@ check(const char *tablename,
 
 	if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
 		DEBUGP(MODULENAME":check: bad MAP_IPS.\n");
-		return 0;
+		return false;
 	}
 	if (mr->rangesize != 1) {
 		DEBUGP(MODULENAME":check: bad rangesize %u.\n", mr->rangesize);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static unsigned int
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 68cc76a..2a04103 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -32,7 +32,7 @@ MODULE_DESCRIPTION("iptables REDIRECT target module");
 #endif
 
 /* FIXME: Take multiple ranges --RR */
-static int
+static bool
 redirect_check(const char *tablename,
 	       const void *e,
 	       const struct xt_target *target,
@@ -43,13 +43,13 @@ redirect_check(const char *tablename,
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
 		DEBUGP("redirect_check: bad MAP_IPS.\n");
-		return 0;
+		return false;
 	}
 	if (mr->rangesize != 1) {
 		DEBUGP("redirect_check: bad rangesize %u.\n", mr->rangesize);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static unsigned int
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 9041e07..5c3270d 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -217,27 +217,27 @@ static unsigned int reject(struct sk_buff **pskb,
 	return NF_DROP;
 }
 
-static int check(const char *tablename,
-		 const void *e_void,
-		 const struct xt_target *target,
-		 void *targinfo,
-		 unsigned int hook_mask)
+static bool check(const char *tablename,
+		  const void *e_void,
+		  const struct xt_target *target,
+		  void *targinfo,
+		  unsigned int hook_mask)
 {
 	const struct ipt_reject_info *rejinfo = targinfo;
 	const struct ipt_entry *e = e_void;
 
 	if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
 		printk("REJECT: ECHOREPLY no longer supported.\n");
-		return 0;
+		return false;
 	} else if (rejinfo->with == IPT_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (e->ip.proto != IPPROTO_TCP
 		    || (e->ip.invflags & XT_INV_PROTO)) {
 			DEBUGP("REJECT: TCP_RESET invalid for non-tcp\n");
-			return 0;
+			return false;
 		}
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_target ipt_reject_reg = {
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 511e5ff..3649fab 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -33,7 +33,7 @@ MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip");
 #define DEBUGP(format, args...)
 #endif
 
-static int
+static bool
 same_check(const char *tablename,
 	      const void *e,
 	      const struct xt_target *target,
@@ -47,13 +47,13 @@ same_check(const char *tablename,
 
 	if (mr->rangesize < 1) {
 		DEBUGP("same_check: need at least one dest range.\n");
-		return 0;
+		return false;
 	}
 	if (mr->rangesize > IPT_SAME_MAX_RANGE) {
 		DEBUGP("same_check: too many ranges specified, maximum "
 				"is %u ranges\n",
 				IPT_SAME_MAX_RANGE);
-		return 0;
+		return false;
 	}
 	for (count = 0; count < mr->rangesize; count++) {
 		if (ntohl(mr->range[count].min_ip) >
@@ -62,11 +62,11 @@ same_check(const char *tablename,
 				"range `%u.%u.%u.%u-%u.%u.%u.%u'.\n",
 				NIPQUAD(mr->range[count].min_ip),
 				NIPQUAD(mr->range[count].max_ip));
-			return 0;
+			return false;
 		}
 		if (!(mr->range[count].flags & IP_NAT_RANGE_MAP_IPS)) {
 			DEBUGP("same_check: bad MAP_IPS.\n");
-			return 0;
+			return false;
 		}
 		rangeip = (ntohl(mr->range[count].max_ip) -
 					ntohl(mr->range[count].min_ip) + 1);
@@ -81,7 +81,7 @@ same_check(const char *tablename,
 		DEBUGP("same_check: Couldn't allocate %u bytes "
 			"for %u ipaddresses!\n",
 			(sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
-		return 0;
+		return false;
 	}
 	DEBUGP("same_check: Allocated %u bytes for %u ipaddresses.\n",
 			(sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
@@ -97,7 +97,7 @@ same_check(const char *tablename,
 			index++;
 		}
 	}
-	return 1;
+	return true;
 }
 
 static void
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 0ad02f2..ac43e86 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -43,7 +43,7 @@ target(struct sk_buff **pskb,
 	return XT_CONTINUE;
 }
 
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *e_void,
 	   const struct xt_target *target,
@@ -58,9 +58,9 @@ checkentry(const char *tablename,
 	    && tos != IPTOS_MINCOST
 	    && tos != IPTOS_NORMALSVC) {
 		printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_target ipt_tos_reg = {
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index a991ec7..96b6e35 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -62,7 +62,7 @@ ipt_ttl_target(struct sk_buff **pskb,
 	return XT_CONTINUE;
 }
 
-static int ipt_ttl_checkentry(const char *tablename,
+static bool ipt_ttl_checkentry(const char *tablename,
 		const void *e,
 		const struct xt_target *target,
 		void *targinfo,
@@ -73,11 +73,11 @@ static int ipt_ttl_checkentry(const char *tablename,
 	if (info->mode > IPT_TTL_MAXMODE) {
 		printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n",
 			info->mode);
-		return 0;
+		return false;
 	}
 	if ((info->mode != IPT_TTL_SET) && (info->ttl == 0))
-		return 0;
-	return 1;
+		return false;
+	return true;
 }
 
 static struct xt_target ipt_TTL = {
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 23b607b..dfa7afd 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -328,25 +328,25 @@ static void ipt_logfn(unsigned int pf,
 	ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
 }
 
-static int ipt_ulog_checkentry(const char *tablename,
-			       const void *e,
-			       const struct xt_target *target,
-			       void *targinfo,
-			       unsigned int hookmask)
+static bool ipt_ulog_checkentry(const char *tablename,
+				const void *e,
+				const struct xt_target *target,
+				void *targinfo,
+				unsigned int hookmask)
 {
 	struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
 
 	if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
 		DEBUGP("ipt_ULOG: prefix term %i\n",
 		       loginfo->prefix[sizeof(loginfo->prefix) - 1]);
-		return 0;
+		return false;
 	}
 	if (loginfo->qthreshold > ULOG_MAX_QLEN) {
 		DEBUGP("ipt_ULOG: queue threshold %i > MAX_QLEN\n",
 			loginfo->qthreshold);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 6740736..fc3d943 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -140,36 +140,36 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
 	return nf_nat_setup_info(ct, &mr->range[0], hooknum);
 }
 
-static int ipt_snat_checkentry(const char *tablename,
-			       const void *entry,
-			       const struct xt_target *target,
-			       void *targinfo,
-			       unsigned int hook_mask)
+static bool ipt_snat_checkentry(const char *tablename,
+				const void *entry,
+				const struct xt_target *target,
+				void *targinfo,
+				unsigned int hook_mask)
 {
 	struct nf_nat_multi_range_compat *mr = targinfo;
 
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
 		printk("SNAT: multiple ranges no longer supported\n");
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
-static int ipt_dnat_checkentry(const char *tablename,
-			       const void *entry,
-			       const struct xt_target *target,
-			       void *targinfo,
-			       unsigned int hook_mask)
+static bool ipt_dnat_checkentry(const char *tablename,
+				const void *entry,
+				const struct xt_target *target,
+				void *targinfo,
+				unsigned int hook_mask)
 {
 	struct nf_nat_multi_range_compat *mr = targinfo;
 
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
 		printk("DNAT: multiple ranges no longer supported\n");
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 inline unsigned int
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index 4115a57..82966c0 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -58,7 +58,7 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
 	return XT_CONTINUE;
 }
 
-static int ip6t_hl_checkentry(const char *tablename,
+static bool ip6t_hl_checkentry(const char *tablename,
 		const void *entry,
 		const struct xt_target *target,
 		void *targinfo,
@@ -69,14 +69,14 @@ static int ip6t_hl_checkentry(const char *tablename,
 	if (info->mode > IP6T_HL_MAXMODE) {
 		printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n",
 			info->mode);
-		return 0;
+		return false;
 	}
 	if ((info->mode != IP6T_HL_SET) && (info->hop_limit == 0)) {
 		printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't "
 			"make sense with value 0\n");
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_target ip6t_HL = {
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 5bb9cd3..aa4b9a1 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -448,24 +448,24 @@ ip6t_log_target(struct sk_buff **pskb,
 }
 
 
-static int ip6t_log_checkentry(const char *tablename,
-			       const void *entry,
-			       const struct xt_target *target,
-			       void *targinfo,
-			       unsigned int hook_mask)
+static bool ip6t_log_checkentry(const char *tablename,
+				const void *entry,
+				const struct xt_target *target,
+				void *targinfo,
+				unsigned int hook_mask)
 {
 	const struct ip6t_log_info *loginfo = targinfo;
 
 	if (loginfo->level >= 8) {
 		DEBUGP("LOG: level %u >= 8\n", loginfo->level);
-		return 0;
+		return false;
 	}
 	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
 		DEBUGP("LOG: prefix term %i\n",
 		       loginfo->prefix[sizeof(loginfo->prefix)-1]);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_target ip6t_log_reg = {
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index cb3d241..8639a059 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -221,27 +221,27 @@ static unsigned int reject6_target(struct sk_buff **pskb,
 	return NF_DROP;
 }
 
-static int check(const char *tablename,
-		 const void *entry,
-		 const struct xt_target *target,
-		 void *targinfo,
-		 unsigned int hook_mask)
+static bool check(const char *tablename,
+		  const void *entry,
+		  const struct xt_target *target,
+		  void *targinfo,
+		  unsigned int hook_mask)
 {
 	const struct ip6t_reject_info *rejinfo = targinfo;
 	const struct ip6t_entry *e = entry;
 
 	if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) {
 		printk("ip6t_REJECT: ECHOREPLY is not supported.\n");
-		return 0;
+		return false;
 	} else if (rejinfo->with == IP6T_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (e->ipv6.proto != IPPROTO_TCP
 		    || (e->ipv6.invflags & XT_INV_PROTO)) {
 			DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n");
-			return 0;
+			return false;
 		}
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_target ip6t_reject_reg = {
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index b03ce00..4e8aa1b 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -76,7 +76,7 @@ target(struct sk_buff **pskb,
 	return XT_CONTINUE;
 }
 
-static int
+static bool
 checkentry(const char *tablename,
 	   const void *entry,
 	   const struct xt_target *target,
@@ -88,21 +88,21 @@ checkentry(const char *tablename,
 	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
 				    "proto=%d\n", target->family);
-		return 0;
+		return false;
 	}
 	if (matchinfo->mode == XT_CONNMARK_RESTORE) {
 		if (strcmp(tablename, "mangle") != 0) {
 			printk(KERN_WARNING "CONNMARK: restore can only be "
 			       "called from \"mangle\" table, not \"%s\"\n",
 			       tablename);
-			return 0;
+			return false;
 		}
 	}
 	if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) {
 		printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static void
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 81c0c58..ab2f0d0 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -85,16 +85,16 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
 	return XT_CONTINUE;
 }
 
-static int checkentry(const char *tablename, const void *entry,
-		      const struct xt_target *target, void *targinfo,
-		      unsigned int hook_mask)
+static bool checkentry(const char *tablename, const void *entry,
+		       const struct xt_target *target, void *targinfo,
+		       unsigned int hook_mask)
 {
 	struct xt_connsecmark_target_info *info = targinfo;
 
 	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
 				    "proto=%d\n", target->family);
-		return 0;
+		return false;
 	}
 	switch (info->mode) {
 	case CONNSECMARK_SAVE:
@@ -103,10 +103,10 @@ static int checkentry(const char *tablename, const void *entry,
 
 	default:
 		printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode);
-		return 0;
+		return false;
 	}
 
-	return 1;
+	return true;
 }
 
 static void
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 9f2f220..2d779f6 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -66,19 +66,19 @@ static unsigned int target6(struct sk_buff **pskb,
 	return XT_CONTINUE;
 }
 
-static int checkentry(const char *tablename,
-		      const void *e_void,
-		      const struct xt_target *target,
-		      void *targinfo,
-		      unsigned int hook_mask)
+static bool checkentry(const char *tablename,
+		       const void *e_void,
+		       const struct xt_target *target,
+		       void *targinfo,
+		       unsigned int hook_mask)
 {
 	const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp;
 
 	if ((dscp > XT_DSCP_MAX)) {
 		printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static struct xt_target xt_dscp_target[] = {
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index 4381780..bd9cdf2 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -65,7 +65,7 @@ target_v1(struct sk_buff **pskb,
 }
 
 
-static int
+static bool
 checkentry_v0(const char *tablename,
 	      const void *entry,
 	      const struct xt_target *target,
@@ -76,12 +76,12 @@ checkentry_v0(const char *tablename,
 
 	if (markinfo->mark > 0xffffffff) {
 		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
-static int
+static bool
 checkentry_v1(const char *tablename,
 	      const void *entry,
 	      const struct xt_target *target,
@@ -95,13 +95,13 @@ checkentry_v1(const char *tablename,
 	    && markinfo->mode != XT_MARK_OR) {
 		printk(KERN_WARNING "MARK: unknown mode %u\n",
 		       markinfo->mode);
-		return 0;
+		return false;
 	}
 	if (markinfo->mark > 0xffffffff) {
 		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 901ed7a..0c6f283 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -38,7 +38,7 @@ nflog_target(struct sk_buff **pskb,
 	return XT_CONTINUE;
 }
 
-static int
+static bool
 nflog_checkentry(const char *tablename, const void *entry,
 		 const struct xt_target *target, void *targetinfo,
 		 unsigned int hookmask)
@@ -46,10 +46,10 @@ nflog_checkentry(const char *tablename, const void *entry,
 	struct xt_nflog_info *info = targetinfo;
 
 	if (info->flags & ~XT_NFLOG_MASK)
-		return 0;
+		return false;
 	if (info->prefix[sizeof(info->prefix) - 1] != '\0')
-		return 0;
-	return 1;
+		return false;
+	return true;
 }
 
 static struct xt_target xt_nflog_target[] = {
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 705f0e8..f3e78c5 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -51,7 +51,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
 	return XT_CONTINUE;
 }
 
-static int checkentry_selinux(struct xt_secmark_target_info *info)
+static bool checkentry_selinux(struct xt_secmark_target_info *info)
 {
 	int err;
 	struct xt_secmark_target_selinux_info *sel = &info->u.sel;
@@ -63,50 +63,50 @@ static int checkentry_selinux(struct xt_secmark_target_info *info)
 		if (err == -EINVAL)
 			printk(KERN_INFO PFX "invalid SELinux context \'%s\'\n",
 			       sel->selctx);
-		return 0;
+		return false;
 	}
 
 	if (!sel->selsid) {
 		printk(KERN_INFO PFX "unable to map SELinux context \'%s\'\n",
 		       sel->selctx);
-		return 0;
+		return false;
 	}
 
 	err = selinux_relabel_packet_permission(sel->selsid);
 	if (err) {
 		printk(KERN_INFO PFX "unable to obtain relabeling permission\n");
-		return 0;
+		return false;
 	}
 
-	return 1;
+	return true;
 }
 
-static int checkentry(const char *tablename, const void *entry,
-		      const struct xt_target *target, void *targinfo,
-		      unsigned int hook_mask)
+static bool checkentry(const char *tablename, const void *entry,
+		       const struct xt_target *target, void *targinfo,
+		       unsigned int hook_mask)
 {
 	struct xt_secmark_target_info *info = targinfo;
 
 	if (mode && mode != info->mode) {
 		printk(KERN_INFO PFX "mode already set to %hu cannot mix with "
 		       "rules for mode %hu\n", mode, info->mode);
-		return 0;
+		return false;
 	}
 
 	switch (info->mode) {
 	case SECMARK_MODE_SEL:
 		if (!checkentry_selinux(info))
-			return 0;
+			return false;
 		break;
 
 	default:
 		printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode);
-		return 0;
+		return false;
 	}
 
 	if (!mode)
 		mode = info->mode;
-	return 1;
+	return true;
 }
 
 static struct xt_target xt_secmark_target[] = {
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 15fe8f6..075051a 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -197,19 +197,19 @@ xt_tcpmss_target6(struct sk_buff **pskb,
 #define TH_SYN 0x02
 
 /* Must specify -p tcp --syn */
-static inline int find_syn_match(const struct xt_entry_match *m)
+static inline bool find_syn_match(const struct xt_entry_match *m)
 {
 	const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
 
 	if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
 	    tcpinfo->flg_cmp & TH_SYN &&
 	    !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
-		return 1;
+		return true;
 
-	return 0;
+	return false;
 }
 
-static int
+static bool
 xt_tcpmss_checkentry4(const char *tablename,
 		      const void *entry,
 		      const struct xt_target *target,
@@ -225,16 +225,16 @@ xt_tcpmss_checkentry4(const char *tablename,
 			   (1 << NF_IP_POST_ROUTING))) != 0) {
 		printk("xt_TCPMSS: path-MTU clamping only supported in "
 		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
-		return 0;
+		return false;
 	}
 	if (IPT_MATCH_ITERATE(e, find_syn_match))
-		return 1;
+		return true;
 	printk("xt_TCPMSS: Only works on TCP SYN packets\n");
-	return 0;
+	return false;
 }
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-static int
+static bool
 xt_tcpmss_checkentry6(const char *tablename,
 		      const void *entry,
 		      const struct xt_target *target,
@@ -250,12 +250,12 @@ xt_tcpmss_checkentry6(const char *tablename,
 			   (1 << NF_IP6_POST_ROUTING))) != 0) {
 		printk("xt_TCPMSS: path-MTU clamping only supported in "
 		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
-		return 0;
+		return false;
 	}
 	if (IP6T_MATCH_ITERATE(e, find_syn_match))
-		return 1;
+		return true;
 	printk("xt_TCPMSS: Only works on TCP SYN packets\n");
-	return 0;
+	return false;
 }
 #endif
 
-- 
cgit v0.10.2


From a47362a226456d8db8207e618324a2278d05d3a7 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Sat, 7 Jul 2007 22:16:55 -0700
Subject: [NETFILTER]: add some consts, remove some casts

Make a number of variables const and/or remove unneeded casts.

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index e82339a..2de7ae0 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -235,12 +235,13 @@ clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
 #endif
 
 static inline u_int32_t
-clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
+clusterip_hashfn(const struct sk_buff *skb,
+		 const struct clusterip_config *config)
 {
-	struct iphdr *iph = ip_hdr(skb);
+	const struct iphdr *iph = ip_hdr(skb);
 	unsigned long hashval;
 	u_int16_t sport, dport;
-	u_int16_t *ports;
+	const u_int16_t *ports;
 
 	switch (iph->protocol) {
 	case IPPROTO_TCP:
@@ -249,7 +250,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
 	case IPPROTO_SCTP:
 	case IPPROTO_DCCP:
 	case IPPROTO_ICMP:
-		ports = (void *)iph+iph->ihl*4;
+		ports = (const void *)iph+iph->ihl*4;
 		sport = ports[0];
 		dport = ports[1];
 		break;
@@ -289,7 +290,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
 }
 
 static inline int
-clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
+clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
 {
 	return test_bit(hash - 1, &config->local_nodes);
 }
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index bbff6c3..bcc43a6 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -41,7 +41,8 @@ static void dump_packet(const struct nf_loginfo *info,
 			const struct sk_buff *skb,
 			unsigned int iphoff)
 {
-	struct iphdr _iph, *ih;
+	struct iphdr _iph;
+	const struct iphdr *ih;
 	unsigned int logflags;
 
 	if (info->type == NF_LOG_TYPE_LOG)
@@ -100,7 +101,8 @@ static void dump_packet(const struct nf_loginfo *info,
 
 	switch (ih->protocol) {
 	case IPPROTO_TCP: {
-		struct tcphdr _tcph, *th;
+		struct tcphdr _tcph;
+		const struct tcphdr *th;
 
 		/* Max length: 10 "PROTO=TCP " */
 		printk("PROTO=TCP ");
@@ -151,7 +153,7 @@ static void dump_packet(const struct nf_loginfo *info,
 		if ((logflags & IPT_LOG_TCPOPT)
 		    && th->doff * 4 > sizeof(struct tcphdr)) {
 			unsigned char _opt[4 * 15 - sizeof(struct tcphdr)];
-			unsigned char *op;
+			const unsigned char *op;
 			unsigned int i, optsize;
 
 			optsize = th->doff * 4 - sizeof(struct tcphdr);
@@ -173,7 +175,8 @@ static void dump_packet(const struct nf_loginfo *info,
 	}
 	case IPPROTO_UDP:
 	case IPPROTO_UDPLITE: {
-		struct udphdr _udph, *uh;
+		struct udphdr _udph;
+		const struct udphdr *uh;
 
 		if (ih->protocol == IPPROTO_UDP)
 			/* Max length: 10 "PROTO=UDP "     */
@@ -200,7 +203,8 @@ static void dump_packet(const struct nf_loginfo *info,
 		break;
 	}
 	case IPPROTO_ICMP: {
-		struct icmphdr _icmph, *ich;
+		struct icmphdr _icmph;
+		const struct icmphdr *ich;
 		static const size_t required_len[NR_ICMP_TYPES+1]
 			= { [ICMP_ECHOREPLY] = 4,
 			    [ICMP_DEST_UNREACH]
@@ -285,7 +289,8 @@ static void dump_packet(const struct nf_loginfo *info,
 	}
 	/* Max Length */
 	case IPPROTO_AH: {
-		struct ip_auth_hdr _ahdr, *ah;
+		struct ip_auth_hdr _ahdr;
+		const struct ip_auth_hdr *ah;
 
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			break;
@@ -307,7 +312,8 @@ static void dump_packet(const struct nf_loginfo *info,
 		break;
 	}
 	case IPPROTO_ESP: {
-		struct ip_esp_hdr _esph, *eh;
+		struct ip_esp_hdr _esph;
+		const struct ip_esp_hdr *eh;
 
 		/* Max length: 10 "PROTO=ESP " */
 		printk("PROTO=ESP ");
@@ -385,11 +391,13 @@ ipt_log_packet(unsigned int pf,
 	       out ? out->name : "");
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge) {
-		struct net_device *physindev = skb->nf_bridge->physindev;
-		struct net_device *physoutdev = skb->nf_bridge->physoutdev;
+		const struct net_device *physindev;
+		const struct net_device *physoutdev;
 
+		physindev = skb->nf_bridge->physindev;
 		if (physindev && in != physindev)
 			printk("PHYSIN=%s ", physindev->name);
+		physoutdev = skb->nf_bridge->physoutdev;
 		if (physoutdev && out != physoutdev)
 			printk("PHYSOUT=%s ", physoutdev->name);
 	}
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index b5b2164..846a0e7 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -70,7 +70,7 @@ masquerade_target(struct sk_buff **pskb,
 	enum ip_conntrack_info ctinfo;
 	struct nf_nat_range newrange;
 	const struct nf_nat_multi_range_compat *mr;
-	struct rtable *rt;
+	const struct rtable *rt;
 	__be32 newsrc;
 
 	NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
@@ -112,7 +112,7 @@ masquerade_target(struct sk_buff **pskb,
 static inline int
 device_cmp(struct nf_conn *i, void *ifindex)
 {
-	struct nf_conn_nat *nat = nfct_nat(i);
+	const struct nf_conn_nat *nat = nfct_nat(i);
 	int ret;
 
 	if (!nat)
@@ -129,7 +129,7 @@ static int masq_device_event(struct notifier_block *this,
 			     unsigned long event,
 			     void *ptr)
 {
-	struct net_device *dev = ptr;
+	const struct net_device *dev = ptr;
 
 	if (event == NETDEV_DOWN) {
 		/* Device was downed.  Search entire table for
@@ -147,7 +147,7 @@ static int masq_inet_event(struct notifier_block *this,
 			   unsigned long event,
 			   void *ptr)
 {
-	struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
+	const struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
 
 	if (event == NETDEV_DOWN) {
 		/* IP address was deleted.  Search entire table for
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 5c3270d..90f7b70 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -122,7 +122,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	tcph->check = 0;
 	tcph->check = tcp_v4_check(sizeof(struct tcphdr),
 				   niph->saddr, niph->daddr,
-				   csum_partial((char *)tcph,
+				   csum_partial(tcph,
 						sizeof(struct tcphdr), 0));
 
 	/* Set DF, id = 0 */
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 96b6e35..f53f2c4 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -68,7 +68,7 @@ static bool ipt_ttl_checkentry(const char *tablename,
 		void *targinfo,
 		unsigned int hook_mask)
 {
-	struct ipt_TTL_info *info = targinfo;
+	const struct ipt_TTL_info *info = targinfo;
 
 	if (info->mode > IPT_TTL_MAXMODE) {
 		printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n",
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index dfa7afd..282eb00 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -334,7 +334,7 @@ static bool ipt_ulog_checkentry(const char *tablename,
 				void *targinfo,
 				unsigned int hookmask)
 {
-	struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
+	const struct ipt_ulog_info *loginfo = targinfo;
 
 	if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
 		DEBUGP("ipt_ULOG: prefix term %i\n",
@@ -359,7 +359,7 @@ struct compat_ipt_ulog_info {
 
 static void compat_from_user(void *dst, void *src)
 {
-	struct compat_ipt_ulog_info *cl = src;
+	const struct compat_ipt_ulog_info *cl = src;
 	struct ipt_ulog_info l = {
 		.nl_group	= cl->nl_group,
 		.copy_range	= cl->copy_range,
@@ -372,7 +372,7 @@ static void compat_from_user(void *dst, void *src)
 
 static int compat_to_user(void __user *dst, void *src)
 {
-	struct ipt_ulog_info *l = src;
+	const struct ipt_ulog_info *l = src;
 	struct compat_ipt_ulog_info cl = {
 		.nl_group	= l->nl_group,
 		.copy_range	= l->copy_range,
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 6b5b7c9..49d503c 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -46,7 +46,8 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       bool *hotdrop)
 {
-	struct ip_auth_hdr _ahdr, *ah;
+	struct ip_auth_hdr _ahdr;
+	const struct ip_auth_hdr *ah;
 	const struct ipt_ah *ahinfo = matchinfo;
 
 	/* Must not be a fragment. */
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index ba4f549..3129e31 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -32,7 +32,8 @@ static inline bool match_tcp(const struct sk_buff *skb,
 			     const struct ipt_ecn_info *einfo,
 			     bool *hotdrop)
 {
-	struct tcphdr _tcph, *th;
+	struct tcphdr _tcph;
+	const struct tcphdr *th;
 
 	/* In practice, TCP match does this, so can't fail.  But let's
 	 * be good citizens.
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index d632e0e..d03e6a6 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -323,7 +323,7 @@ struct recent_iter_state {
 static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct recent_iter_state *st = seq->private;
-	struct recent_table *t = st->table;
+	const struct recent_table *t = st->table;
 	struct recent_entry *e;
 	loff_t p = *pos;
 
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index b1aa598..ef0a99e 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -190,7 +190,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
 			tcph->check = 0;
 			tcph->check = tcp_v4_check(datalen,
 						   iph->saddr, iph->daddr,
-						   csum_partial((char *)tcph,
+						   csum_partial(tcph,
 								datalen, 0));
 		}
 	} else
@@ -278,7 +278,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
 			udph->check = 0;
 			udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 							datalen, IPPROTO_UDP,
-							csum_partial((char *)udph,
+							csum_partial(udph,
 								     datalen, 0));
 			if (!udph->check)
 				udph->check = CSUM_MANGLED_0;
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index 82966c0..20047ff 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -64,7 +64,7 @@ static bool ip6t_hl_checkentry(const char *tablename,
 		void *targinfo,
 		unsigned int hook_mask)
 {
-	struct ip6t_HL_info *info = targinfo;
+	const struct ip6t_HL_info *info = targinfo;
 
 	if (info->mode > IP6T_HL_MAXMODE) {
 		printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n",
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index aa4b9a1..996168d 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -48,7 +48,8 @@ static void dump_packet(const struct nf_loginfo *info,
 {
 	u_int8_t currenthdr;
 	int fragment;
-	struct ipv6hdr _ip6h, *ih;
+	struct ipv6hdr _ip6h;
+	const struct ipv6hdr *ih;
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
 	unsigned int logflags;
@@ -78,7 +79,8 @@ static void dump_packet(const struct nf_loginfo *info,
 	ptr = ip6hoff + sizeof(struct ipv6hdr);
 	currenthdr = ih->nexthdr;
 	while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
-		struct ipv6_opt_hdr _hdr, *hp;
+		struct ipv6_opt_hdr _hdr;
+		const struct ipv6_opt_hdr *hp;
 
 		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
 		if (hp == NULL) {
@@ -92,7 +94,8 @@ static void dump_packet(const struct nf_loginfo *info,
 
 		switch (currenthdr) {
 		case IPPROTO_FRAGMENT: {
-			struct frag_hdr _fhdr, *fh;
+			struct frag_hdr _fhdr;
+			const struct frag_hdr *fh;
 
 			printk("FRAG:");
 			fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
@@ -131,7 +134,8 @@ static void dump_packet(const struct nf_loginfo *info,
 		/* Max Length */
 		case IPPROTO_AH:
 			if (logflags & IP6T_LOG_IPOPT) {
-				struct ip_auth_hdr _ahdr, *ah;
+				struct ip_auth_hdr _ahdr;
+				const struct ip_auth_hdr *ah;
 
 				/* Max length: 3 "AH " */
 				printk("AH ");
@@ -162,7 +166,8 @@ static void dump_packet(const struct nf_loginfo *info,
 			break;
 		case IPPROTO_ESP:
 			if (logflags & IP6T_LOG_IPOPT) {
-				struct ip_esp_hdr _esph, *eh;
+				struct ip_esp_hdr _esph;
+				const struct ip_esp_hdr *eh;
 
 				/* Max length: 4 "ESP " */
 				printk("ESP ");
@@ -202,7 +207,8 @@ static void dump_packet(const struct nf_loginfo *info,
 
 	switch (currenthdr) {
 	case IPPROTO_TCP: {
-		struct tcphdr _tcph, *th;
+		struct tcphdr _tcph;
+		const struct tcphdr *th;
 
 		/* Max length: 10 "PROTO=TCP " */
 		printk("PROTO=TCP ");
@@ -250,7 +256,8 @@ static void dump_packet(const struct nf_loginfo *info,
 
 		if ((logflags & IP6T_LOG_TCPOPT)
 		    && th->doff * 4 > sizeof(struct tcphdr)) {
-			u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
+			u_int8_t _opt[60 - sizeof(struct tcphdr)];
+			const u_int8_t *op;
 			unsigned int i;
 			unsigned int optsize = th->doff * 4
 					       - sizeof(struct tcphdr);
@@ -273,7 +280,8 @@ static void dump_packet(const struct nf_loginfo *info,
 	}
 	case IPPROTO_UDP:
 	case IPPROTO_UDPLITE: {
-		struct udphdr _udph, *uh;
+		struct udphdr _udph;
+		const struct udphdr *uh;
 
 		if (currenthdr == IPPROTO_UDP)
 			/* Max length: 10 "PROTO=UDP "     */
@@ -298,7 +306,8 @@ static void dump_packet(const struct nf_loginfo *info,
 		break;
 	}
 	case IPPROTO_ICMPV6: {
-		struct icmp6hdr _icmp6h, *ic;
+		struct icmp6hdr _icmp6h;
+		const struct icmp6hdr *ic;
 
 		/* Max length: 13 "PROTO=ICMPv6 " */
 		printk("PROTO=ICMPv6 ");
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 8639a059..4df07f0 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -159,7 +159,7 @@ static void send_reset(struct sk_buff *oldskb)
 	tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
 				      &ipv6_hdr(nskb)->daddr,
 				      sizeof(struct tcphdr), IPPROTO_TCP,
-				      csum_partial((char *)tcph,
+				      csum_partial(tcph,
 						   sizeof(struct tcphdr), 0));
 
 	nf_ct_attach(nskb, oldskb);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 8fc00bd..b4b1d28 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -51,7 +51,8 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       bool *hotdrop)
 {
-	struct ip_auth_hdr *ah, _ah;
+	struct ip_auth_hdr _ah;
+	const struct ip_auth_hdr *ah;
 	const struct ip6t_ah *ahinfo = matchinfo;
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index f0aed89..e0e416b 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -50,7 +50,8 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       bool *hotdrop)
 {
-	struct frag_hdr _frag, *fh;
+	struct frag_hdr _frag;
+	const struct frag_hdr *fh;
 	const struct ip6t_frag *fraginfo = matchinfo;
 	unsigned int ptr;
 	int err;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 6fdd797..bbd2615 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -57,14 +57,17 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       bool *hotdrop)
 {
-	struct ipv6_opt_hdr _optsh, *oh;
+	struct ipv6_opt_hdr _optsh;
+	const struct ipv6_opt_hdr *oh;
 	const struct ip6t_opts *optinfo = matchinfo;
 	unsigned int temp;
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
 	bool ret = false;
-	u8 _opttype, *tp = NULL;
-	u8 _optlen, *lp = NULL;
+	u8 _opttype;
+	u8 _optlen;
+	const u_int8_t *tp = NULL;
+	const u_int8_t *lp = NULL;
 	unsigned int optlen;
 	int err;
 
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index a3008b4..e94fdd8 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -47,7 +47,8 @@ match(const struct sk_buff *skb,
 	 unsigned int protoff,
 	 bool *hotdrop)
 {
-	struct ip6_mh _mh, *mh;
+	struct ip6_mh _mh;
+	const struct ip6_mh *mh;
 	const struct ip6t_mh *mhinfo = matchinfo;
 
 	/* Must not be a fragment. */
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index e991ed4..bc5ff4b 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -52,13 +52,15 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       bool *hotdrop)
 {
-	struct ipv6_rt_hdr _route, *rh;
+	struct ipv6_rt_hdr _route;
+	const struct ipv6_rt_hdr *rh;
 	const struct ip6t_rt *rtinfo = matchinfo;
 	unsigned int temp;
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
 	bool ret = false;
-	struct in6_addr *ap, _addr;
+	struct in6_addr _addr;
+	const struct in6_addr *ap;
 	int err;
 
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL);
@@ -100,9 +102,9 @@ match(const struct sk_buff *skb,
 		 !!(rtinfo->invflags & IP6T_RT_INV_LEN))));
 	DEBUGP("res %02X %02X %02X ",
 	       (rtinfo->flags & IP6T_RT_RES),
-	       ((struct rt0_hdr *)rh)->reserved,
+	       ((const struct rt0_hdr *)rh)->reserved,
 	       !((rtinfo->flags & IP6T_RT_RES) &&
-		 (((struct rt0_hdr *)rh)->reserved)));
+		 (((const struct rt0_hdr *)rh)->reserved)));
 
 	ret = (rh != NULL)
 	      &&
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 3aaabec..381a77c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -231,13 +231,13 @@ void nf_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
 {
 	__be32 diff[] = { ~from, to };
 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		*sum = csum_fold(csum_partial((char *)diff, sizeof(diff),
+		*sum = csum_fold(csum_partial(diff, sizeof(diff),
 				~csum_unfold(*sum)));
 		if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
-			skb->csum = ~csum_partial((char *)diff, sizeof(diff),
+			skb->csum = ~csum_partial(diff, sizeof(diff),
 						~skb->csum);
 	} else if (pseudohdr)
-		*sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff),
+		*sum = ~csum_fold(csum_partial(diff, sizeof(diff),
 				csum_unfold(*sum)));
 }
 EXPORT_SYMBOL(nf_proto_csum_replace4);
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 4e8aa1b..4284a59 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -83,7 +83,7 @@ checkentry(const char *tablename,
 	   void *targinfo,
 	   unsigned int hook_mask)
 {
-	struct xt_connmark_target_info *matchinfo = targinfo;
+	const struct xt_connmark_target_info *matchinfo = targinfo;
 
 	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
@@ -121,7 +121,7 @@ struct compat_xt_connmark_target_info {
 
 static void compat_from_user(void *dst, void *src)
 {
-	struct compat_xt_connmark_target_info *cm = src;
+	const struct compat_xt_connmark_target_info *cm = src;
 	struct xt_connmark_target_info m = {
 		.mark	= cm->mark,
 		.mask	= cm->mask,
@@ -132,7 +132,7 @@ static void compat_from_user(void *dst, void *src)
 
 static int compat_to_user(void __user *dst, void *src)
 {
-	struct xt_connmark_target_info *m = src;
+	const struct xt_connmark_target_info *m = src;
 	struct compat_xt_connmark_target_info cm = {
 		.mark	= m->mark,
 		.mask	= m->mask,
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index ab2f0d0..8d5e154 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -33,7 +33,7 @@ MODULE_ALIAS("ip6t_CONNSECMARK");
  * If the packet has a security mark and the connection does not, copy
  * the security mark from the packet to the connection.
  */
-static void secmark_save(struct sk_buff *skb)
+static void secmark_save(const struct sk_buff *skb)
 {
 	if (skb->secmark) {
 		struct nf_conn *ct;
@@ -89,7 +89,7 @@ static bool checkentry(const char *tablename, const void *entry,
 		       const struct xt_target *target, void *targinfo,
 		       unsigned int hook_mask)
 {
-	struct xt_connsecmark_target_info *info = targinfo;
+	const struct xt_connsecmark_target_info *info = targinfo;
 
 	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index bd9cdf2..6b7369f 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -72,7 +72,7 @@ checkentry_v0(const char *tablename,
 	      void *targinfo,
 	      unsigned int hook_mask)
 {
-	struct xt_mark_target_info *markinfo = targinfo;
+	const struct xt_mark_target_info *markinfo = targinfo;
 
 	if (markinfo->mark > 0xffffffff) {
 		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
@@ -88,7 +88,7 @@ checkentry_v1(const char *tablename,
 	      void *targinfo,
 	      unsigned int hook_mask)
 {
-	struct xt_mark_target_info_v1 *markinfo = targinfo;
+	const struct xt_mark_target_info_v1 *markinfo = targinfo;
 
 	if (markinfo->mode != XT_MARK_SET
 	    && markinfo->mode != XT_MARK_AND
@@ -114,7 +114,7 @@ struct compat_xt_mark_target_info_v1 {
 
 static void compat_from_user_v1(void *dst, void *src)
 {
-	struct compat_xt_mark_target_info_v1 *cm = src;
+	const struct compat_xt_mark_target_info_v1 *cm = src;
 	struct xt_mark_target_info_v1 m = {
 		.mark	= cm->mark,
 		.mode	= cm->mode,
@@ -124,7 +124,7 @@ static void compat_from_user_v1(void *dst, void *src)
 
 static int compat_to_user_v1(void __user *dst, void *src)
 {
-	struct xt_mark_target_info_v1 *m = src;
+	const struct xt_mark_target_info_v1 *m = src;
 	struct compat_xt_mark_target_info_v1 cm = {
 		.mark	= m->mark,
 		.mode	= m->mode,
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 0c6f283..20e55d5 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -43,7 +43,7 @@ nflog_checkentry(const char *tablename, const void *entry,
 		 const struct xt_target *target, void *targetinfo,
 		 unsigned int hookmask)
 {
-	struct xt_nflog_info *info = targetinfo;
+	const struct xt_nflog_info *info = targetinfo;
 
 	if (info->flags & ~XT_NFLOG_MASK)
 		return false;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 1254178..99c246e 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -26,7 +26,7 @@ match(const struct sk_buff *skb,
       bool *hotdrop)
 {
 	const struct xt_connbytes_info *sinfo = matchinfo;
-	struct nf_conn *ct;
+	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	u_int64_t what = 0;	/* initialize to make gcc happy */
 	u_int64_t bytes = 0;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 94d5251..71f3c1a 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -41,7 +41,7 @@ match(const struct sk_buff *skb,
       bool *hotdrop)
 {
 	const struct xt_connmark_info *info = matchinfo;
-	struct nf_conn *ct;
+	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 
 	ct = nf_ct_get(skb, &ctinfo);
@@ -58,7 +58,7 @@ checkentry(const char *tablename,
 	   void *matchinfo,
 	   unsigned int hook_mask)
 {
-	struct xt_connmark_info *cm = matchinfo;
+	const struct xt_connmark_info *cm = matchinfo;
 
 	if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) {
 		printk(KERN_WARNING "connmark: only support 32bit mark\n");
@@ -88,7 +88,7 @@ struct compat_xt_connmark_info {
 
 static void compat_from_user(void *dst, void *src)
 {
-	struct compat_xt_connmark_info *cm = src;
+	const struct compat_xt_connmark_info *cm = src;
 	struct xt_connmark_info m = {
 		.mark	= cm->mark,
 		.mask	= cm->mask,
@@ -99,7 +99,7 @@ static void compat_from_user(void *dst, void *src)
 
 static int compat_to_user(void __user *dst, void *src)
 {
-	struct xt_connmark_info *m = src;
+	const struct xt_connmark_info *m = src;
 	struct compat_xt_connmark_info cm = {
 		.mark	= m->mark,
 		.mask	= m->mask,
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 87364f5..9e3ec31 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -30,11 +30,11 @@ match(const struct sk_buff *skb,
       bool *hotdrop)
 {
 	const struct xt_conntrack_info *sinfo = matchinfo;
-	struct nf_conn *ct;
+	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	unsigned int statebit;
 
-	ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 
 #define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
 
@@ -150,7 +150,7 @@ struct compat_xt_conntrack_info
 
 static void compat_from_user(void *dst, void *src)
 {
-	struct compat_xt_conntrack_info *cm = src;
+	const struct compat_xt_conntrack_info *cm = src;
 	struct xt_conntrack_info m = {
 		.statemask	= cm->statemask,
 		.statusmask	= cm->statusmask,
@@ -167,7 +167,7 @@ static void compat_from_user(void *dst, void *src)
 
 static int compat_to_user(void __user *dst, void *src)
 {
-	struct xt_conntrack_info *m = src;
+	const struct xt_conntrack_info *m = src;
 	struct compat_xt_conntrack_info cm = {
 		.statemask	= m->statemask,
 		.statusmask	= m->statusmask,
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 2489590..1b77c5b 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -39,7 +39,7 @@ dccp_find_option(u_int8_t option,
 		 bool *hotdrop)
 {
 	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
-	unsigned char *op;
+	const unsigned char *op;
 	unsigned int optoff = __dccp_hdr_len(dh);
 	unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh);
 	unsigned int i;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index a1b5996..deb5890 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -95,7 +95,7 @@ static HLIST_HEAD(hashlimit_htables);
 static struct kmem_cache *hashlimit_cachep __read_mostly;
 
 static inline bool dst_cmp(const struct dsthash_ent *ent,
-			   struct dsthash_dst *b)
+			   const struct dsthash_dst *b)
 {
 	return !memcmp(&ent->dst, b, sizeof(ent->dst));
 }
@@ -107,7 +107,8 @@ hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst)
 }
 
 static struct dsthash_ent *
-dsthash_find(const struct xt_hashlimit_htable *ht, struct dsthash_dst *dst)
+dsthash_find(const struct xt_hashlimit_htable *ht,
+	     const struct dsthash_dst *dst)
 {
 	struct dsthash_ent *ent;
 	struct hlist_node *pos;
@@ -123,7 +124,8 @@ dsthash_find(const struct xt_hashlimit_htable *ht, struct dsthash_dst *dst)
 
 /* allocate dsthash_ent, initialize dst, put in htable and lock it */
 static struct dsthash_ent *
-dsthash_alloc_init(struct xt_hashlimit_htable *ht, struct dsthash_dst *dst)
+dsthash_alloc_init(struct xt_hashlimit_htable *ht,
+		   const struct dsthash_dst *dst)
 {
 	struct dsthash_ent *ent;
 
@@ -228,19 +230,21 @@ static int htable_create(struct xt_hashlimit_info *minfo, int family)
 	return 0;
 }
 
-static bool select_all(struct xt_hashlimit_htable *ht, struct dsthash_ent *he)
+static bool select_all(const struct xt_hashlimit_htable *ht,
+		       const struct dsthash_ent *he)
 {
 	return 1;
 }
 
-static bool select_gc(struct xt_hashlimit_htable *ht, struct dsthash_ent *he)
+static bool select_gc(const struct xt_hashlimit_htable *ht,
+		      const struct dsthash_ent *he)
 {
 	return (jiffies >= he->expires);
 }
 
 static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
-				bool (*select)(struct xt_hashlimit_htable *ht,
-					      struct dsthash_ent *he))
+			bool (*select)(const struct xt_hashlimit_htable *ht,
+				      const struct dsthash_ent *he))
 {
 	unsigned int i;
 
@@ -283,7 +287,8 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo)
 	vfree(hinfo);
 }
 
-static struct xt_hashlimit_htable *htable_find_get(char *name, int family)
+static struct xt_hashlimit_htable *htable_find_get(const char *name,
+						   int family)
 {
 	struct xt_hashlimit_htable *hinfo;
 	struct hlist_node *pos;
@@ -368,7 +373,8 @@ static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now)
 }
 
 static int
-hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst,
+hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
+		   struct dsthash_dst *dst,
 		   const struct sk_buff *skb, unsigned int protoff)
 {
 	__be16 _ports[2], *ports;
@@ -443,8 +449,8 @@ hashlimit_match(const struct sk_buff *skb,
 		unsigned int protoff,
 		bool *hotdrop)
 {
-	struct xt_hashlimit_info *r =
-		((struct xt_hashlimit_info *)matchinfo)->u.master;
+	const struct xt_hashlimit_info *r =
+		((const struct xt_hashlimit_info *)matchinfo)->u.master;
 	struct xt_hashlimit_htable *hinfo = r->hinfo;
 	unsigned long now = jiffies;
 	struct dsthash_ent *dh;
@@ -543,7 +549,7 @@ hashlimit_checkentry(const char *tablename,
 static void
 hashlimit_destroy(const struct xt_match *match, void *matchinfo)
 {
-	struct xt_hashlimit_info *r = matchinfo;
+	const struct xt_hashlimit_info *r = matchinfo;
 
 	htable_put(r->hinfo);
 }
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index a2688b8..047d004 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -39,12 +39,12 @@ match(const struct sk_buff *skb,
       bool *hotdrop)
 {
 	const struct xt_helper_info *info = matchinfo;
-	struct nf_conn *ct;
-	struct nf_conn_help *master_help;
+	const struct nf_conn *ct;
+	const struct nf_conn_help *master_help;
 	enum ip_conntrack_info ctinfo;
 	bool ret = info->invert;
 
-	ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	if (!ct) {
 		DEBUGP("xt_helper: Eek! invalid conntrack?\n");
 		return ret;
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 2717aa6..b042419 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -67,7 +67,8 @@ ipt_limit_match(const struct sk_buff *skb,
 		unsigned int protoff,
 		bool *hotdrop)
 {
-	struct xt_rateinfo *r = ((struct xt_rateinfo *)matchinfo)->master;
+	struct xt_rateinfo *r =
+		((const struct xt_rateinfo *)matchinfo)->master;
 	unsigned long now = jiffies;
 
 	spin_lock_bh(&limit_lock);
@@ -144,7 +145,7 @@ struct compat_xt_rateinfo {
  * master pointer, which does not need to be preserved. */
 static void compat_from_user(void *dst, void *src)
 {
-	struct compat_xt_rateinfo *cm = src;
+	const struct compat_xt_rateinfo *cm = src;
 	struct xt_rateinfo m = {
 		.avg		= cm->avg,
 		.burst		= cm->burst,
@@ -158,7 +159,7 @@ static void compat_from_user(void *dst, void *src)
 
 static int compat_to_user(void __user *dst, void *src)
 {
-	struct xt_rateinfo *m = src;
+	const struct xt_rateinfo *m = src;
 	struct compat_xt_rateinfo cm = {
 		.avg		= m->avg,
 		.burst		= m->burst,
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 83ed8067..b8ab794 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -60,7 +60,7 @@ struct compat_xt_mark_info {
 
 static void compat_from_user(void *dst, void *src)
 {
-	struct compat_xt_mark_info *cm = src;
+	const struct compat_xt_mark_info *cm = src;
 	struct xt_mark_info m = {
 		.mark	= cm->mark,
 		.mask	= cm->mask,
@@ -71,7 +71,7 @@ static void compat_from_user(void *dst, void *src)
 
 static int compat_to_user(void __user *dst, void *src)
 {
-	struct xt_mark_info *m = src;
+	const struct xt_mark_info *m = src;
 	struct compat_xt_mark_info cm = {
 		.mark	= m->mark,
 		.mask	= m->mask,
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 34f0d3e..467b2dc 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -36,7 +36,7 @@ match(const struct sk_buff *skb,
 	const struct xt_physdev_info *info = matchinfo;
 	bool ret;
 	const char *indev, *outdev;
-	struct nf_bridge_info *nf_bridge;
+	const struct nf_bridge_info *nf_bridge;
 
 	/* Not a bridged IP packet or no info available yet:
 	 * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 1534de5..5ab6d71 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -34,7 +34,7 @@ xt_addr_cmp(const union xt_policy_addr *a1, const union xt_policy_addr *m,
 }
 
 static inline bool
-match_xfrm_state(struct xfrm_state *x, const struct xt_policy_elem *e,
+match_xfrm_state(const struct xfrm_state *x, const struct xt_policy_elem *e,
 		 unsigned short family)
 {
 #define MATCH_ADDR(x,y,z)	(!e->match.x ||			       \
@@ -55,7 +55,7 @@ match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info,
 		unsigned short family)
 {
 	const struct xt_policy_elem *e;
-	struct sec_path *sp = skb->sp;
+	const struct sec_path *sp = skb->sp;
 	int strict = info->flags & XT_POLICY_MATCH_STRICT;
 	int i, pos;
 
@@ -85,7 +85,7 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
 		 unsigned short family)
 {
 	const struct xt_policy_elem *e;
-	struct dst_entry *dst = skb->dst;
+	const struct dst_entry *dst = skb->dst;
 	int strict = info->flags & XT_POLICY_MATCH_STRICT;
 	int i, pos;
 
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index e13d62a..feb130d 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -22,7 +22,8 @@ match(const struct sk_buff *skb,
       const struct xt_match *match, const void *matchinfo,
       int offset, unsigned int protoff, bool *hotdrop)
 {
-	struct xt_quota_info *q = ((struct xt_quota_info *)matchinfo)->master;
+	struct xt_quota_info *q =
+		((const struct xt_quota_info *)matchinfo)->master;
 	bool ret = q->flags & XT_QUOTA_INVERT;
 
 	spin_lock_bh(&quota_lock);
@@ -43,7 +44,7 @@ checkentry(const char *tablename, const void *entry,
 	   const struct xt_match *match, void *matchinfo,
 	   unsigned int hook_mask)
 {
-	struct xt_quota_info *q = (struct xt_quota_info *)matchinfo;
+	struct xt_quota_info *q = matchinfo;
 
 	if (q->flags & ~XT_QUOTA_MASK)
 		return false;
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index ad82c13..44b807d 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -32,7 +32,7 @@ match(const struct sk_buff *skb,
       bool *hotdrop)
 {
 	const struct xt_realm_info *info = matchinfo;
-	struct dst_entry *dst = skb->dst;
+	const struct dst_entry *dst = skb->dst;
 
 	return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
 }
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 0af4289..3da4978 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -57,7 +57,7 @@ checkentry(const char *tablename, const void *entry,
 	   const struct xt_match *match, void *matchinfo,
 	   unsigned int hook_mask)
 {
-	struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo;
+	struct xt_statistic_info *info = matchinfo;
 
 	if (info->mode > XT_STATISTIC_MODE_MAX ||
 	    info->flags & ~XT_STATISTIC_MASK)
-- 
cgit v0.10.2


From 170b197c0afc621179f0f82284e331e3c252b7cf Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Sat, 7 Jul 2007 22:17:36 -0700
Subject: [NETFILTER]: Remove incorrect inline markers

device_cmp: the function's address is taken (call to nf_ct_iterate_cleanup)
alloc_null_binding: referenced externally

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 846a0e7..f136ef7 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -109,7 +109,7 @@ masquerade_target(struct sk_buff **pskb,
 	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
-static inline int
+static int
 device_cmp(struct nf_conn *i, void *ifindex)
 {
 	const struct nf_conn_nat *nat = nfct_nat(i);
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index fc3d943..ea1a07c 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -172,7 +172,7 @@ static bool ipt_dnat_checkentry(const char *tablename,
 	return true;
 }
 
-inline unsigned int
+unsigned int
 alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
 {
 	/* Force range to this IP; let proto decide mapping for
-- 
cgit v0.10.2


From 7c4e36bc172ae1accde835b880fdc4a2c2a3df57 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Sat, 7 Jul 2007 22:19:08 -0700
Subject: [NETFILTER]: Remove redundant parentheses/braces

Removes redundant parentheses and braces (And add one pair in a
xt_tcpudp.c macro).

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2de7ae0..5de13b4 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -122,9 +122,8 @@ __clusterip_config_find(__be32 clusterip)
 	list_for_each(pos, &clusterip_configs) {
 		struct clusterip_config *c = list_entry(pos,
 					struct clusterip_config, list);
-		if (c->clusterip == clusterip) {
+		if (c->clusterip == clusterip)
 			return c;
-		}
 	}
 
 	return NULL;
@@ -155,9 +154,8 @@ clusterip_config_init_nodelist(struct clusterip_config *c,
 {
 	int n;
 
-	for (n = 0; n < i->num_local_nodes; n++) {
+	for (n = 0; n < i->num_local_nodes; n++)
 		set_bit(i->local_nodes[n] - 1, &c->local_nodes);
-	}
 }
 
 static struct clusterip_config *
@@ -255,10 +253,9 @@ clusterip_hashfn(const struct sk_buff *skb,
 		dport = ports[1];
 		break;
 	default:
-		if (net_ratelimit()) {
+		if (net_ratelimit())
 			printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n",
 				iph->protocol);
-		}
 		sport = dport = 0;
 	}
 
@@ -286,7 +283,7 @@ clusterip_hashfn(const struct sk_buff *skb,
 	}
 
 	/* node numbers are 1..n, not 0..n */
-	return ((hashval % config->num_total_nodes)+1);
+	return (hashval % config->num_total_nodes) + 1;
 }
 
 static inline int
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 0236701..a647c1d 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -58,8 +58,8 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 
 	if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) ||
 	     tcph->ece == einfo->proto.tcp.ece) &&
-	    ((!(einfo->operation & IPT_ECN_OP_SET_CWR) ||
-	     tcph->cwr == einfo->proto.tcp.cwr)))
+	    (!(einfo->operation & IPT_ECN_OP_SET_CWR) ||
+	     tcph->cwr == einfo->proto.tcp.cwr))
 		return true;
 
 	if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index f53f2c4..737830b 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -75,7 +75,7 @@ static bool ipt_ttl_checkentry(const char *tablename,
 			info->mode);
 		return false;
 	}
-	if ((info->mode != IPT_TTL_SET) && (info->ttl == 0))
+	if (info->mode != IPT_TTL_SET && info->ttl == 0)
 		return false;
 	return true;
 }
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 282eb00..5b25ca6 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -179,12 +179,10 @@ static void ipt_ulog_packet(unsigned int hooknum,
 	unsigned int groupnum = ffs(loginfo->nl_group) - 1;
 
 	/* calculate the size of the skb needed */
-	if ((loginfo->copy_range == 0) ||
-	    (loginfo->copy_range > skb->len)) {
+	if (loginfo->copy_range == 0 || loginfo->copy_range > skb->len)
 		copy_len = skb->len;
-	} else {
+	else
 		copy_len = loginfo->copy_range;
-	}
 
 	size = NLMSG_SPACE(sizeof(*pm) + copy_len);
 
@@ -257,9 +255,8 @@ static void ipt_ulog_packet(unsigned int hooknum,
 		BUG();
 
 	/* check if we are building multi-part messages */
-	if (ub->qlen > 1) {
+	if (ub->qlen > 1)
 		ub->lastnlh->nlmsg_flags |= NLM_F_MULTI;
-	}
 
 	ub->lastnlh = nlh;
 
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index b266d98..854281c 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -35,8 +35,8 @@ match(const struct sk_buff *skb,
 	const struct iphdr *iph = ip_hdr(skb);
 
 	if (info->flags & IPRANGE_SRC) {
-		if (((ntohl(iph->saddr) < ntohl(info->src.min_ip))
-			  || (ntohl(iph->saddr) > ntohl(info->src.max_ip)))
+		if ((ntohl(iph->saddr) < ntohl(info->src.min_ip)
+			  || ntohl(iph->saddr) > ntohl(info->src.max_ip))
 			 ^ !!(info->flags & IPRANGE_SRC_INV)) {
 			DEBUGP("src IP %u.%u.%u.%u NOT in range %s"
 			       "%u.%u.%u.%u-%u.%u.%u.%u\n",
@@ -48,8 +48,8 @@ match(const struct sk_buff *skb,
 		}
 	}
 	if (info->flags & IPRANGE_DST) {
-		if (((ntohl(iph->daddr) < ntohl(info->dst.min_ip))
-			  || (ntohl(iph->daddr) > ntohl(info->dst.max_ip)))
+		if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip)
+			  || ntohl(iph->daddr) > ntohl(info->dst.max_ip))
 			 ^ !!(info->flags & IPRANGE_DST_INV)) {
 			DEBUGP("dst IP %u.%u.%u.%u NOT in range %s"
 			       "%u.%u.%u.%u-%u.%u.%u.%u\n",
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index d03e6a6..68f7181 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -163,10 +163,9 @@ static void recent_table_flush(struct recent_table *t)
 	struct recent_entry *e, *next;
 	unsigned int i;
 
-	for (i = 0; i < ip_list_hash_size; i++) {
+	for (i = 0; i < ip_list_hash_size; i++)
 		list_for_each_entry_safe(e, next, &t->iphash[i], list)
 			recent_entry_remove(t, e);
-	}
 }
 
 static bool
@@ -329,12 +328,10 @@ static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
 
 	spin_lock_bh(&recent_lock);
 
-	for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++) {
-		list_for_each_entry(e, &t->iphash[st->bucket], list) {
+	for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++)
+		list_for_each_entry(e, &t->iphash[st->bucket], list)
 			if (p-- == 0)
 				return e;
-		}
-	}
 	return NULL;
 }
 
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index 82fe4ea..59a644d 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -28,17 +28,13 @@ static bool match(const struct sk_buff *skb,
 
 	switch (info->mode) {
 		case IPT_TTL_EQ:
-			return (ttl == info->ttl);
-			break;
+			return ttl == info->ttl;
 		case IPT_TTL_NE:
-			return (!(ttl == info->ttl));
-			break;
+			return ttl != info->ttl;
 		case IPT_TTL_LT:
-			return (ttl < info->ttl);
-			break;
+			return ttl < info->ttl;
 		case IPT_TTL_GT:
-			return (ttl > info->ttl);
-			break;
+			return ttl > info->ttl;
 		default:
 			printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
 				info->mode);
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index 20047ff..33c4cb8 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -71,7 +71,7 @@ static bool ip6t_hl_checkentry(const char *tablename,
 			info->mode);
 		return false;
 	}
-	if ((info->mode != IP6T_HL_SET) && (info->hop_limit == 0)) {
+	if (info->mode != IP6T_HL_SET && info->hop_limit == 0) {
 		printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't "
 			"make sense with value 0\n");
 		return false;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 4df07f0..0fa1f2c 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -69,7 +69,7 @@ static void send_reset(struct sk_buff *oldskb)
 	otcplen = oldskb->len - tcphoff;
 
 	/* IP header checks: fragment, too short. */
-	if ((proto != IPPROTO_TCP) || (otcplen < sizeof(struct tcphdr))) {
+	if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
 		DEBUGP("ip6t_REJECT: proto(%d) != IPPROTO_TCP, or too short. otcplen = %d\n",
 			proto, otcplen);
 		return;
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index b4b1d28..fbf3d77 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -78,9 +78,9 @@ match(const struct sk_buff *skb,
 	DEBUGP("SPI %u %08X\n", ntohl(ah->spi), ntohl(ah->spi));
 
 	DEBUGP("IPv6 AH spi %02X ",
-	       (spi_match(ahinfo->spis[0], ahinfo->spis[1],
-			  ntohl(ah->spi),
-			  !!(ahinfo->invflags & IP6T_AH_INV_SPI))));
+	       spi_match(ahinfo->spis[0], ahinfo->spis[1],
+			 ntohl(ah->spi),
+			 !!(ahinfo->invflags & IP6T_AH_INV_SPI)));
 	DEBUGP("len %02X %04X %02X ",
 	       ahinfo->hdrlen, hdrlen,
 	       (!ahinfo->hdrlen ||
@@ -92,9 +92,9 @@ match(const struct sk_buff *skb,
 
 	return (ah != NULL)
 	       &&
-	       (spi_match(ahinfo->spis[0], ahinfo->spis[1],
-			  ntohl(ah->spi),
-			  !!(ahinfo->invflags & IP6T_AH_INV_SPI)))
+	       spi_match(ahinfo->spis[0], ahinfo->spis[1],
+			 ntohl(ah->spi),
+			 !!(ahinfo->invflags & IP6T_AH_INV_SPI))
 	       &&
 	       (!ahinfo->hdrlen ||
 		(ahinfo->hdrlen == hdrlen) ^
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index bebb12a..2af99fc 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -33,7 +33,7 @@ match(const struct sk_buff *skb,
 	int i = 0;
 
 	if (!(skb_mac_header(skb) >= skb->head &&
-	      (skb_mac_header(skb) + ETH_HLEN) <= skb->data) &&
+	      skb_mac_header(skb) + ETH_HLEN <= skb->data) &&
 	    offset != 0) {
 		*hotdrop = true;
 		return false;
@@ -50,8 +50,8 @@ match(const struct sk_buff *skb,
 			eui64[0] |= 0x02;
 
 			i = 0;
-			while ((ipv6_hdr(skb)->saddr.s6_addr[8 + i] == eui64[i])
-			       && (i < 8))
+			while (ipv6_hdr(skb)->saddr.s6_addr[8 + i] == eui64[i]
+			       && i < 8)
 				i++;
 
 			if (i == 8)
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index e0e416b..65482af 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -77,35 +77,35 @@ match(const struct sk_buff *skb,
 	       ntohl(fh->identification));
 
 	DEBUGP("IPv6 FRAG id %02X ",
-	       (id_match(fraginfo->ids[0], fraginfo->ids[1],
+	       id_match(fraginfo->ids[0], fraginfo->ids[1],
 			 ntohl(fh->identification),
-			 !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))));
+			 !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)));
 	DEBUGP("res %02X %02X%04X %02X ",
-	       (fraginfo->flags & IP6T_FRAG_RES), fh->reserved,
+	       fraginfo->flags & IP6T_FRAG_RES, fh->reserved,
 	       ntohs(fh->frag_off) & 0x6,
 	       !((fraginfo->flags & IP6T_FRAG_RES)
 		 && (fh->reserved || (ntohs(fh->frag_off) & 0x06))));
 	DEBUGP("first %02X %02X %02X ",
-	       (fraginfo->flags & IP6T_FRAG_FST),
+	       fraginfo->flags & IP6T_FRAG_FST,
 	       ntohs(fh->frag_off) & ~0x7,
 	       !((fraginfo->flags & IP6T_FRAG_FST)
 		 && (ntohs(fh->frag_off) & ~0x7)));
 	DEBUGP("mf %02X %02X %02X ",
-	       (fraginfo->flags & IP6T_FRAG_MF),
+	       fraginfo->flags & IP6T_FRAG_MF,
 	       ntohs(fh->frag_off) & IP6_MF,
 	       !((fraginfo->flags & IP6T_FRAG_MF)
 		 && !((ntohs(fh->frag_off) & IP6_MF))));
 	DEBUGP("last %02X %02X %02X\n",
-	       (fraginfo->flags & IP6T_FRAG_NMF),
+	       fraginfo->flags & IP6T_FRAG_NMF,
 	       ntohs(fh->frag_off) & IP6_MF,
 	       !((fraginfo->flags & IP6T_FRAG_NMF)
 		 && (ntohs(fh->frag_off) & IP6_MF)));
 
 	return (fh != NULL)
 	       &&
-	       (id_match(fraginfo->ids[0], fraginfo->ids[1],
-			 ntohl(fh->identification),
-			 !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)))
+	       id_match(fraginfo->ids[0], fraginfo->ids[1],
+			ntohl(fh->identification),
+			!!(fraginfo->invflags & IP6T_FRAG_INV_IDS))
 	       &&
 	       !((fraginfo->flags & IP6T_FRAG_RES)
 		 && (fh->reserved || (ntohs(fh->frag_off) & 0x6)))
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index bbd2615..8eecac1 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -160,7 +160,7 @@ match(const struct sk_buff *skb,
 			DEBUGP("len%04X \n", optlen);
 
 			if ((ptr > skb->len - optlen || hdrlen < optlen) &&
-			    (temp < optinfo->optsnr - 1)) {
+			    temp < optinfo->optsnr - 1) {
 				DEBUGP("new pointer is too large! \n");
 				break;
 			}
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index b933e84..ddee088 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -29,16 +29,16 @@ static bool match(const struct sk_buff *skb,
 
 	switch (info->mode) {
 		case IP6T_HL_EQ:
-			return (ip6h->hop_limit == info->hop_limit);
+			return ip6h->hop_limit == info->hop_limit;
 			break;
 		case IP6T_HL_NE:
-			return (!(ip6h->hop_limit == info->hop_limit));
+			return ip6h->hop_limit != info->hop_limit;
 			break;
 		case IP6T_HL_LT:
-			return (ip6h->hop_limit < info->hop_limit);
+			return ip6h->hop_limit < info->hop_limit;
 			break;
 		case IP6T_HL_GT:
-			return (ip6h->hop_limit > info->hop_limit);
+			return ip6h->hop_limit > info->hop_limit;
 			break;
 		default:
 			printk(KERN_WARNING "ip6t_hl: unknown mode %d\n",
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 5ba6ef0..ca020ce 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -74,9 +74,9 @@ ipv6header_match(const struct sk_buff *skb,
 		BUG_ON(hp == NULL);
 
 		/* Calculate the header length */
-		if (nexthdr == NEXTHDR_FRAGMENT) {
+		if (nexthdr == NEXTHDR_FRAGMENT)
 			hdrlen = 8;
-		} else if (nexthdr == NEXTHDR_AUTH)
+		else if (nexthdr == NEXTHDR_AUTH)
 			hdrlen = (hp->hdrlen + 2) << 2;
 		else
 			hdrlen = ipv6_optlen(hp);
@@ -110,7 +110,7 @@ ipv6header_match(const struct sk_buff *skb,
 			break;
 	}
 
-	if ((nexthdr != NEXTHDR_NONE) && (nexthdr != NEXTHDR_ESP))
+	if (nexthdr != NEXTHDR_NONE && nexthdr != NEXTHDR_ESP)
 		temp |= MASK_PROTO;
 
 	if (info->modeflag)
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index 8cb6c94..d2bf320 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -38,17 +38,15 @@ match(const struct sk_buff *skb,
 	if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
 		return false;
 
-	if (info->match & IP6T_OWNER_UID) {
+	if (info->match & IP6T_OWNER_UID)
 		if ((skb->sk->sk_socket->file->f_uid != info->uid) ^
 		    !!(info->invert & IP6T_OWNER_UID))
 			return false;
-	}
 
-	if (info->match & IP6T_OWNER_GID) {
+	if (info->match & IP6T_OWNER_GID)
 		if ((skb->sk->sk_socket->file->f_gid != info->gid) ^
 		    !!(info->invert & IP6T_OWNER_GID))
 			return false;
-	}
 
 	return true;
 }
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index bc5ff4b..f86fdcd 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -87,9 +87,9 @@ match(const struct sk_buff *skb,
 	DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
 
 	DEBUGP("IPv6 RT segsleft %02X ",
-	       (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
-			       rh->segments_left,
-			       !!(rtinfo->invflags & IP6T_RT_INV_SGS))));
+	       segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
+			      rh->segments_left,
+			      !!(rtinfo->invflags & IP6T_RT_INV_SGS)));
 	DEBUGP("type %02X %02X %02X ",
 	       rtinfo->rt_type, rh->type,
 	       (!(rtinfo->flags & IP6T_RT_TYP) ||
@@ -97,11 +97,11 @@ match(const struct sk_buff *skb,
 		 !!(rtinfo->invflags & IP6T_RT_INV_TYP))));
 	DEBUGP("len %02X %04X %02X ",
 	       rtinfo->hdrlen, hdrlen,
-	       (!(rtinfo->flags & IP6T_RT_LEN) ||
+	       !(rtinfo->flags & IP6T_RT_LEN) ||
 		((rtinfo->hdrlen == hdrlen) ^
-		 !!(rtinfo->invflags & IP6T_RT_INV_LEN))));
+		 !!(rtinfo->invflags & IP6T_RT_INV_LEN)));
 	DEBUGP("res %02X %02X %02X ",
-	       (rtinfo->flags & IP6T_RT_RES),
+	       rtinfo->flags & IP6T_RT_RES,
 	       ((const struct rt0_hdr *)rh)->reserved,
 	       !((rtinfo->flags & IP6T_RT_RES) &&
 		 (((const struct rt0_hdr *)rh)->reserved)));
@@ -188,8 +188,8 @@ match(const struct sk_buff *skb,
 					break;
 			}
 			DEBUGP("temp=%d #%d\n", temp, rtinfo->addrnr);
-			if ((temp == rtinfo->addrnr) &&
-			    (temp == (unsigned int)((hdrlen - 8) / 16)))
+			if (temp == rtinfo->addrnr &&
+			    temp == (unsigned int)((hdrlen - 8) / 16))
 				return ret;
 			else
 				return false;
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 2d779f6..ed6b524 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -74,7 +74,7 @@ static bool checkentry(const char *tablename,
 {
 	const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp;
 
-	if ((dscp > XT_DSCP_MAX)) {
+	if (dscp > XT_DSCP_MAX) {
 		printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
 		return false;
 	}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 075051a..6ae6df9 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -93,7 +93,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
 				return 0;
 
 			opt[i+2] = (newmss & 0xff00) >> 8;
-			opt[i+3] = (newmss & 0x00ff);
+			opt[i+3] = newmss & 0x00ff;
 
 			nf_proto_csum_replace2(&tcph->check, *pskb,
 					       htons(oldmss), htons(newmss), 0);
@@ -126,7 +126,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
 	opt[0] = TCPOPT_MSS;
 	opt[1] = TCPOLEN_MSS;
 	opt[2] = (newmss & 0xff00) >> 8;
-	opt[3] = (newmss & 0x00ff);
+	opt[3] = newmss & 0x00ff;
 
 	nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0);
 
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 99c246e..d9b2e75 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -90,9 +90,9 @@ match(const struct sk_buff *skb,
 	}
 
 	if (sinfo->count.to)
-		return (what <= sinfo->count.to && what >= sinfo->count.from);
+		return what <= sinfo->count.to && what >= sinfo->count.from;
 	else
-		return (what >= sinfo->count.from);
+		return what >= sinfo->count.from;
 }
 
 static bool check(const char *tablename,
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 71f3c1a..3a6e16d 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -48,7 +48,7 @@ match(const struct sk_buff *skb,
 	if (!ct)
 		return false;
 
-	return (((ct->mark) & info->mask) == info->mark) ^ info->invert;
+	return ((ct->mark & info->mask) == info->mark) ^ info->invert;
 }
 
 static bool
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 1b77c5b..f07a68d 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -81,7 +81,7 @@ dccp_find_option(u_int8_t option,
 static inline bool
 match_types(const struct dccp_hdr *dh, u_int16_t typemask)
 {
-	return (typemask & (1 << dh->dccph_type));
+	return typemask & (1 << dh->dccph_type);
 }
 
 static inline bool
@@ -113,11 +113,11 @@ match(const struct sk_buff *skb,
 		return false;
 	}
 
-	return  DCCHECK(((ntohs(dh->dccph_sport) >= info->spts[0])
-			&& (ntohs(dh->dccph_sport) <= info->spts[1])),
+	return  DCCHECK(ntohs(dh->dccph_sport) >= info->spts[0]
+			&& ntohs(dh->dccph_sport) <= info->spts[1],
 			XT_DCCP_SRC_PORTS, info->flags, info->invflags)
-		&& DCCHECK(((ntohs(dh->dccph_dport) >= info->dpts[0])
-			&& (ntohs(dh->dccph_dport) <= info->dpts[1])),
+		&& DCCHECK(ntohs(dh->dccph_dport) >= info->dpts[0]
+			&& ntohs(dh->dccph_dport) <= info->dpts[1],
 			XT_DCCP_DEST_PORTS, info->flags, info->invflags)
 		&& DCCHECK(match_types(dh, info->typemask),
 			   XT_DCCP_TYPE, info->flags, info->invflags)
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index deb5890..094da6e 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -239,7 +239,7 @@ static bool select_all(const struct xt_hashlimit_htable *ht,
 static bool select_gc(const struct xt_hashlimit_htable *ht,
 		      const struct dsthash_ent *he)
 {
-	return (jiffies >= he->expires);
+	return jiffies >= he->expires;
 }
 
 static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 57bcfac..ea4880b 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -47,8 +47,8 @@ match6(const struct sk_buff *skb,
        bool *hotdrop)
 {
 	const struct xt_length_info *info = matchinfo;
-	const u_int16_t pktlen = (ntohs(ipv6_hdr(skb)->payload_len) +
-				  sizeof(struct ipv6hdr));
+	const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
+				 sizeof(struct ipv6hdr);
 
 	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
 }
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 8602202..28ec08e 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -37,11 +37,11 @@ match(const struct sk_buff *skb,
     const struct xt_mac_info *info = matchinfo;
 
     /* Is mac pointer valid? */
-    return (skb_mac_header(skb) >= skb->head &&
-	    (skb_mac_header(skb) + ETH_HLEN) <= skb->data
-	    /* If so, compare... */
-	    && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr))
-		^ info->invert));
+    return skb_mac_header(skb) >= skb->head &&
+	   skb_mac_header(skb) + ETH_HLEN <= skb->data
+	   /* If so, compare... */
+	   && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr))
+		^ info->invert);
 }
 
 static struct xt_match xt_mac_match[] = {
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 6323972..e4c420b 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -34,9 +34,9 @@ static bool match(const struct sk_buff *skb,
 	const struct xt_pkttype_info *info = matchinfo;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
-		type = (MULTICAST(ip_hdr(skb)->daddr)
+		type = MULTICAST(ip_hdr(skb)->daddr)
 			? PACKET_MULTICAST
-			: PACKET_BROADCAST);
+			: PACKET_BROADCAST;
 	else
 		type = skb->pkt_type;
 
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 22df338..fefc846 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -31,11 +31,9 @@ match_flags(const struct xt_sctp_flag_info *flag_info,
 {
 	int i;
 
-	for (i = 0; i < flag_count; i++) {
-		if (flag_info[i].chunktype == chunktype) {
+	for (i = 0; i < flag_count; i++)
+		if (flag_info[i].chunktype == chunktype)
 			return (chunkflags & flag_info[i].flag_mask) == flag_info[i].flag;
-		}
-	}
 
 	return true;
 }
@@ -56,9 +54,8 @@ match_packet(const struct sk_buff *skb,
 	int i = 0;
 #endif
 
-	if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) {
+	if (chunk_match_type == SCTP_CHUNK_MATCH_ALL)
 		SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap);
-	}
 
 	do {
 		sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
@@ -86,16 +83,14 @@ match_packet(const struct sk_buff *skb,
 
 			case SCTP_CHUNK_MATCH_ALL:
 				if (match_flags(flag_info, flag_count,
-					sch->type, sch->flags)) {
+				    sch->type, sch->flags))
 					SCTP_CHUNKMAP_CLEAR(chunkmapcopy, sch->type);
-				}
 				break;
 
 			case SCTP_CHUNK_MATCH_ONLY:
 				if (!match_flags(flag_info, flag_count,
-					sch->type, sch->flags)) {
+				    sch->type, sch->flags))
 					return false;
-				}
 				break;
 			}
 		} else {
@@ -145,11 +140,11 @@ match(const struct sk_buff *skb,
 	}
 	duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
 
-	return  SCCHECK(((ntohs(sh->source) >= info->spts[0])
-			&& (ntohs(sh->source) <= info->spts[1])),
+	return  SCCHECK(ntohs(sh->source) >= info->spts[0]
+			&& ntohs(sh->source) <= info->spts[1],
 			XT_SCTP_SRC_PORTS, info->flags, info->invflags)
-		&& SCCHECK(((ntohs(sh->dest) >= info->dpts[0])
-			&& (ntohs(sh->dest) <= info->dpts[1])),
+		&& SCCHECK(ntohs(sh->dest) >= info->dpts[0]
+			&& ntohs(sh->dest) <= info->dpts[1],
 			XT_SCTP_DEST_PORTS, info->flags, info->invflags)
 		&& SCCHECK(match_packet(skb, protoff + sizeof (sctp_sctphdr_t),
 					info->chunkmap, info->chunk_match_type,
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 0dd3022..5cb345a 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -95,7 +95,7 @@ tcp_match(const struct sk_buff *skb,
 		return false;
 	}
 
-#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
+#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg)))
 
 	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
 	if (th == NULL) {
-- 
cgit v0.10.2


From f4a607bfae30d15aad46e75d2ed7a39f7ce7708b Mon Sep 17 00:00:00 2001
From: Jerome Borsboom <j.borsboom@erasmusmc.nl>
Date: Sat, 7 Jul 2007 22:19:48 -0700
Subject: [NETFILTER]: nf_nat_sip: only perform RTP DNAT if SIP session was
 SNATed

DNAT of the the RTP session is only necessary if the SIP session has
been SNATed.

Signed-off-by: Jerome Borsboom <j.borsboom@erasmusmc.nl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index fac97cf..a32d746 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -260,7 +260,11 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb,
 	DEBUGP("ip_nat_sdp():\n");
 
 	/* Connection will come from reply */
-	newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+	if (ct->tuplehash[dir].tuple.src.u3.ip ==
+	    ct->tuplehash[!dir].tuple.dst.u3.ip)
+		newip = exp->tuple.dst.u3.ip;
+	else
+		newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
 
 	exp->saved_ip = exp->tuple.dst.u3.ip;
 	exp->tuple.dst.u3.ip = newip;
-- 
cgit v0.10.2


From 1b50b8a371e90a5e110f466e4ac02cf6b5f681de Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Sat, 7 Jul 2007 22:20:36 -0700
Subject: [NETFILTER]: Add u32 match

Along comes... xt_u32, a revamped ipt_u32 from POM-NG,
Plus:

    *	2007-06-02: added ipv6 support

    *	2007-06-05: uses kmalloc for the big buffer

    *   2007-06-05: added inversion

    *   2007-06-20: use skb_copy_bits() and get rid of the big buffer
        and lock (suggested by Pablo Neira Ayuso)

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netfilter/xt_u32.h b/include/linux/netfilter/xt_u32.h
new file mode 100644
index 0000000..9947f56
--- /dev/null
+++ b/include/linux/netfilter/xt_u32.h
@@ -0,0 +1,40 @@
+#ifndef _XT_U32_H
+#define _XT_U32_H 1
+
+enum xt_u32_ops {
+	XT_U32_AND,
+	XT_U32_LEFTSH,
+	XT_U32_RIGHTSH,
+	XT_U32_AT,
+};
+
+struct xt_u32_location_element {
+	u_int32_t number;
+	u_int8_t nextop;
+};
+
+struct xt_u32_value_element {
+	u_int32_t min;
+	u_int32_t max;
+};
+
+/*
+ * Any way to allow for an arbitrary number of elements?
+ * For now, I settle with a limit of 10 each.
+ */
+#define XT_U32_MAXSIZE 10
+
+struct xt_u32_test {
+	struct xt_u32_location_element location[XT_U32_MAXSIZE+1];
+	struct xt_u32_value_element value[XT_U32_MAXSIZE+1];
+	u_int8_t nnums;
+	u_int8_t nvalues;
+};
+
+struct xt_u32 {
+	struct xt_u32_test tests[XT_U32_MAXSIZE+1];
+	u_int8_t ntests;
+	u_int8_t invert;
+};
+
+#endif /* _XT_U32_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index a567dae..aa567fa 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -635,6 +635,19 @@ config NETFILTER_XT_MATCH_TCPMSS
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_U32
+	tristate '"u32" match support'
+	depends on NETFILTER_XTABLES
+	---help---
+	  u32 allows you to extract quantities of up to 4 bytes from a packet,
+	  AND them with specified masks, shift them by specified amounts and
+	  test whether the results are in any of a set of specified ranges.
+	  The specification of what to extract is general enough to skip over
+	  headers with lengths stored in the packet, as in IP or TCP header
+	  lengths.
+
+	  Details and examples are in the kernel module source.
+
 config NETFILTER_XT_MATCH_HASHLIMIT
 	tristate '"hashlimit" match support'
 	depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index b2b5c75..3cf5b9c 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -72,4 +72,5 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
new file mode 100644
index 0000000..0706875
--- /dev/null
+++ b/net/netfilter/xt_u32.c
@@ -0,0 +1,135 @@
+/*
+ *	xt_u32 - kernel module to match u32 packet content
+ *
+ *	Original author: Don Cohen <don@isis.cs3-inc.com>
+ *	© Jan Engelhardt <jengelh@gmx.de>, 2007
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_u32.h>
+
+static bool u32_match_it(const struct xt_u32 *data,
+			 const struct sk_buff *skb)
+{
+	const struct xt_u32_test *ct;
+	unsigned int testind;
+	unsigned int nnums;
+	unsigned int nvals;
+	unsigned int i;
+	u_int32_t pos;
+	u_int32_t val;
+	u_int32_t at;
+	int ret;
+
+	/*
+	 * Small example: "0 >> 28 == 4 && 8 & 0xFF0000 >> 16 = 6, 17"
+	 * (=IPv4 and (TCP or UDP)). Outer loop runs over the "&&" operands.
+	 */
+	for (testind = 0; testind < data->ntests; ++testind) {
+		ct  = &data->tests[testind];
+		at  = 0;
+		pos = ct->location[0].number;
+
+		if (skb->len < 4 || pos > skb->len - 4);
+			return false;
+
+		ret   = skb_copy_bits(skb, pos, &val, sizeof(val));
+		BUG_ON(ret < 0);
+		val   = ntohl(val);
+		nnums = ct->nnums;
+
+		/* Inner loop runs over "&", "<<", ">>" and "@" operands */
+		for (i = 1; i < nnums; ++i) {
+			u_int32_t number = ct->location[i].number;
+			switch (ct->location[i].nextop) {
+			case XT_U32_AND:
+				val &= number;
+				break;
+			case XT_U32_LEFTSH:
+				val <<= number;
+				break;
+			case XT_U32_RIGHTSH:
+				val >>= number;
+				break;
+			case XT_U32_AT:
+				if (at + val < at)
+					return false;
+				at += val;
+				pos = number;
+				if (at + 4 < at || skb->len < at + 4 ||
+				    pos > skb->len - at - 4)
+					return false;
+
+				ret = skb_copy_bits(skb, at + pos, &val,
+						    sizeof(val));
+				BUG_ON(ret < 0);
+				val = ntohl(val);
+				break;
+			}
+		}
+
+		/* Run over the "," and ":" operands */
+		nvals = ct->nvalues;
+		for (i = 0; i < nvals; ++i)
+			if (ct->value[i].min <= val && val <= ct->value[i].max)
+				break;
+
+		if (i >= ct->nvalues)
+			return false;
+	}
+
+	return true;
+}
+
+static bool u32_match(const struct sk_buff *skb,
+		      const struct net_device *in,
+		      const struct net_device *out,
+		      const struct xt_match *match, const void *matchinfo,
+		      int offset, unsigned int protoff, bool *hotdrop)
+{
+	const struct xt_u32 *data = matchinfo;
+	bool ret;
+
+	ret = u32_match_it(data, skb);
+	return ret ^ data->invert;
+}
+
+static struct xt_match u32_reg[] = {
+	{
+		.name       = "u32",
+		.family     = AF_INET,
+		.match      = u32_match,
+		.matchsize  = sizeof(struct xt_u32),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "u32",
+		.family     = AF_INET6,
+		.match      = u32_match,
+		.matchsize  = sizeof(struct xt_u32),
+		.me         = THIS_MODULE,
+	},
+};
+
+static int __init xt_u32_init(void)
+{
+	return xt_register_matches(u32_reg, ARRAY_SIZE(u32_reg));
+}
+
+static void __exit xt_u32_exit(void)
+{
+	xt_unregister_matches(u32_reg, ARRAY_SIZE(u32_reg));
+}
+
+module_init(xt_u32_init);
+module_exit(xt_u32_exit);
+MODULE_AUTHOR("Jan Engelhardt <jengelh@gmx.de>");
+MODULE_DESCRIPTION("netfilter u32 match module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_u32");
+MODULE_ALIAS("ip6t_u32");
-- 
cgit v0.10.2


From ba9dda3ab5a865542e69dfe01edb2436857c9420 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 7 Jul 2007 22:21:23 -0700
Subject: [NETFILTER]: x_tables: add TRACE target

The TRACE target can be used to follow IP and IPv6 packets through
the ruleset.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick NcHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2d6a14f..625d73b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -227,6 +227,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@mark: Generic packet mark
  *	@nfct: Associated connection, if any
  *	@ipvs_property: skbuff is owned by ipvs
+ *	@nf_trace: netfilter packet trace flag
  *	@nfctinfo: Relationship of this skb to the connection
  *	@nfct_reasm: netfilter conntrack re-assembly pointer
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
@@ -278,7 +279,8 @@ struct sk_buff {
 				nfctinfo:3;
 	__u8			pkt_type:3,
 				fclone:2,
-				ipvs_property:1;
+				ipvs_property:1,
+				nf_trace:1;
 	__be16			protocol;
 
 	void			(*destructor)(struct sk_buff *skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6a41b96..0583e84 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -428,6 +428,10 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 	n->destructor = NULL;
 	C(mark);
 	__nf_copy(n, skb);
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+	C(nf_trace);
+#endif
 #ifdef CONFIG_NET_SCHED
 	C(tc_index);
 #ifdef CONFIG_NET_CLS_ACT
@@ -485,6 +489,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->destructor = NULL;
 	new->mark	= old->mark;
 	__nf_copy(new, old);
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+	new->nf_trace	= old->nf_trace;
+#endif
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
 	new->ipvs_property = old->ipvs_property;
 #endif
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a7dd343..c9e2b5e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -399,6 +399,10 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->tc_index = from->tc_index;
 #endif
 	nf_copy(to, from);
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+	to->nf_trace = from->nf_trace;
+#endif
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
 	to->ipvs_property = from->ipvs_property;
 #endif
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 7962306..650ab52 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -204,6 +204,112 @@ get_entry(void *base, unsigned int offset)
 	return (struct ipt_entry *)(base + offset);
 }
 
+/* All zeroes == unconditional rule. */
+static inline int
+unconditional(const struct ipt_ip *ip)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
+		if (((__u32 *)ip)[i])
+			return 0;
+
+	return 1;
+}
+
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+static const char *hooknames[] = {
+	[NF_IP_PRE_ROUTING]		= "PREROUTING",
+	[NF_IP_LOCAL_IN]		= "INPUT",
+	[NF_IP_FORWARD]			= "FORWARD",
+	[NF_IP_LOCAL_OUT]		= "OUTPUT",
+	[NF_IP_POST_ROUTING]		= "POSTROUTING",
+};
+
+enum nf_ip_trace_comments {
+	NF_IP_TRACE_COMMENT_RULE,
+	NF_IP_TRACE_COMMENT_RETURN,
+	NF_IP_TRACE_COMMENT_POLICY,
+};
+
+static const char *comments[] = {
+	[NF_IP_TRACE_COMMENT_RULE]	= "rule",
+	[NF_IP_TRACE_COMMENT_RETURN]	= "return",
+	[NF_IP_TRACE_COMMENT_POLICY]	= "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+	.type = NF_LOG_TYPE_LOG,
+	.u = {
+		.log = {
+			.level = 4,
+			.logflags = NF_LOG_MASK,
+		},
+	},
+};
+
+static inline int
+get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
+		      char *hookname, char **chainname,
+		      char **comment, unsigned int *rulenum)
+{
+	struct ipt_standard_target *t = (void *)ipt_get_target(s);
+
+	if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
+		/* Head of user chain: ERROR target with chainname */
+		*chainname = t->target.data;
+		(*rulenum) = 0;
+	} else if (s == e) {
+		(*rulenum)++;
+
+		if (s->target_offset == sizeof(struct ipt_entry)
+		   && strcmp(t->target.u.kernel.target->name,
+			     IPT_STANDARD_TARGET) == 0
+		   && t->verdict < 0
+		   && unconditional(&s->ip)) {
+			/* Tail of chains: STANDARD target (return/policy) */
+			*comment = *chainname == hookname
+				? (char *)comments[NF_IP_TRACE_COMMENT_POLICY]
+				: (char *)comments[NF_IP_TRACE_COMMENT_RETURN];
+		}
+		return 1;
+	} else
+		(*rulenum)++;
+
+	return 0;
+}
+
+static void trace_packet(struct sk_buff *skb,
+			 unsigned int hook,
+			 const struct net_device *in,
+			 const struct net_device *out,
+			 char *tablename,
+			 struct xt_table_info *private,
+			 struct ipt_entry *e)
+{
+	void *table_base;
+	struct ipt_entry *root;
+	char *hookname, *chainname, *comment;
+	unsigned int rulenum = 0;
+
+	table_base = (void *)private->entries[smp_processor_id()];
+	root = get_entry(table_base, private->hook_entry[hook]);
+
+	hookname = chainname = (char *)hooknames[hook];
+	comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE];
+
+	IPT_ENTRY_ITERATE(root,
+			  private->size - private->hook_entry[hook],
+			  get_chainname_rulenum,
+			  e, hookname, &chainname, &comment, &rulenum);
+
+	nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
+		      "TRACE: %s:%s:%s:%u ",
+		      tablename, chainname, comment, rulenum);
+}
+#endif
+
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
 ipt_do_table(struct sk_buff **pskb,
@@ -261,6 +367,14 @@ ipt_do_table(struct sk_buff **pskb,
 
 			t = ipt_get_target(e);
 			IP_NF_ASSERT(t->u.kernel.target);
+
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+			/* The packet is traced: log it */
+			if (unlikely((*pskb)->nf_trace))
+				trace_packet(*pskb, hook, in, out,
+					     table->name, private, e);
+#endif
 			/* Standard target? */
 			if (!t->u.kernel.target->target) {
 				int v;
@@ -341,19 +455,6 @@ ipt_do_table(struct sk_buff **pskb,
 #endif
 }
 
-/* All zeroes == unconditional rule. */
-static inline int
-unconditional(const struct ipt_ip *ip)
-{
-	unsigned int i;
-
-	for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
-		if (((__u32 *)ip)[i])
-			return 0;
-
-	return 1;
-}
-
 /* Figures out from what hook each rule can be called: returns 0 if
    there are loops.  Puts hook bitmask in comefrom. */
 static int
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 31dafaf..50d86e9 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -521,6 +521,10 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->tc_index = from->tc_index;
 #endif
 	nf_copy(to, from);
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+	to->nf_trace = from->nf_trace;
+#endif
 	skb_copy_secmark(to, from);
 }
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7fe4d29..4f93b79 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -241,6 +241,113 @@ get_entry(void *base, unsigned int offset)
 	return (struct ip6t_entry *)(base + offset);
 }
 
+/* All zeroes == unconditional rule. */
+static inline int
+unconditional(const struct ip6t_ip6 *ipv6)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof(*ipv6); i++)
+		if (((char *)ipv6)[i])
+			break;
+
+	return (i == sizeof(*ipv6));
+}
+
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+/* This cries for unification! */
+static const char *hooknames[] = {
+	[NF_IP6_PRE_ROUTING]		= "PREROUTING",
+	[NF_IP6_LOCAL_IN]		= "INPUT",
+	[NF_IP6_FORWARD]		= "FORWARD",
+	[NF_IP6_LOCAL_OUT]		= "OUTPUT",
+	[NF_IP6_POST_ROUTING]		= "POSTROUTING",
+};
+
+enum nf_ip_trace_comments {
+	NF_IP6_TRACE_COMMENT_RULE,
+	NF_IP6_TRACE_COMMENT_RETURN,
+	NF_IP6_TRACE_COMMENT_POLICY,
+};
+
+static const char *comments[] = {
+	[NF_IP6_TRACE_COMMENT_RULE]	= "rule",
+	[NF_IP6_TRACE_COMMENT_RETURN]	= "return",
+	[NF_IP6_TRACE_COMMENT_POLICY]	= "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+	.type = NF_LOG_TYPE_LOG,
+	.u = {
+		.log = {
+			.level = 4,
+			.logflags = NF_LOG_MASK,
+		},
+	},
+};
+
+static inline int
+get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e,
+		      char *hookname, char **chainname,
+		      char **comment, unsigned int *rulenum)
+{
+	struct ip6t_standard_target *t = (void *)ip6t_get_target(s);
+
+	if (strcmp(t->target.u.kernel.target->name, IP6T_ERROR_TARGET) == 0) {
+		/* Head of user chain: ERROR target with chainname */
+		*chainname = t->target.data;
+		(*rulenum) = 0;
+	} else if (s == e) {
+		(*rulenum)++;
+
+		if (s->target_offset == sizeof(struct ip6t_entry)
+		   && strcmp(t->target.u.kernel.target->name,
+			     IP6T_STANDARD_TARGET) == 0
+		   && t->verdict < 0
+		   && unconditional(&s->ipv6)) {
+			/* Tail of chains: STANDARD target (return/policy) */
+			*comment = *chainname == hookname
+				? (char *)comments[NF_IP6_TRACE_COMMENT_POLICY]
+				: (char *)comments[NF_IP6_TRACE_COMMENT_RETURN];
+		}
+		return 1;
+	} else
+		(*rulenum)++;
+
+	return 0;
+}
+
+static void trace_packet(struct sk_buff *skb,
+			 unsigned int hook,
+			 const struct net_device *in,
+			 const struct net_device *out,
+			 char *tablename,
+			 struct xt_table_info *private,
+			 struct ip6t_entry *e)
+{
+	void *table_base;
+	struct ip6t_entry *root;
+	char *hookname, *chainname, *comment;
+	unsigned int rulenum = 0;
+
+	table_base = (void *)private->entries[smp_processor_id()];
+	root = get_entry(table_base, private->hook_entry[hook]);
+
+	hookname = chainname = (char *)hooknames[hook];
+	comment = (char *)comments[NF_IP6_TRACE_COMMENT_RULE];
+
+	IP6T_ENTRY_ITERATE(root,
+			   private->size - private->hook_entry[hook],
+			   get_chainname_rulenum,
+			   e, hookname, &chainname, &comment, &rulenum);
+
+	nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo,
+		      "TRACE: %s:%s:%s:%u ",
+		      tablename, chainname, comment, rulenum);
+}
+#endif
+
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
 ip6t_do_table(struct sk_buff **pskb,
@@ -298,6 +405,14 @@ ip6t_do_table(struct sk_buff **pskb,
 
 			t = ip6t_get_target(e);
 			IP_NF_ASSERT(t->u.kernel.target);
+
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+			/* The packet is traced: log it */
+			if (unlikely((*pskb)->nf_trace))
+				trace_packet(*pskb, hook, in, out,
+					     table->name, private, e);
+#endif
 			/* Standard target? */
 			if (!t->u.kernel.target->target) {
 				int v;
@@ -377,19 +492,6 @@ ip6t_do_table(struct sk_buff **pskb,
 #endif
 }
 
-/* All zeroes == unconditional rule. */
-static inline int
-unconditional(const struct ip6t_ip6 *ipv6)
-{
-	unsigned int i;
-
-	for (i = 0; i < sizeof(*ipv6); i++)
-		if (((char *)ipv6)[i])
-			break;
-
-	return (i == sizeof(*ipv6));
-}
-
 /* Figures out from what hook each rule can be called: returns 0 if
    there are loops.  Puts hook bitmask in comefrom. */
 static int
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index aa567fa..df5e8da 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -343,6 +343,18 @@ config NETFILTER_XT_TARGET_NOTRACK
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_TARGET_TRACE
+	tristate  '"TRACE" target support'
+	depends on NETFILTER_XTABLES
+	depends on IP_NF_RAW || IP6_NF_RAW
+	help
+	  The TRACE target allows you to mark packets so that the kernel
+	  will log every rule which match the packets as those traverse
+	  the tables, chains, rules.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
 config NETFILTER_XT_TARGET_SECMARK
 	tristate '"SECMARK" target support'
 	depends on NETFILTER_XTABLES && NETWORK_SECMARK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 3cf5b9c..3b79268 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
new file mode 100644
index 0000000..b82fc46
--- /dev/null
+++ b/net/netfilter/xt_TRACE.c
@@ -0,0 +1,53 @@
+/* This is a module which is used to mark packets for tracing.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_TRACE");
+MODULE_ALIAS("ip6t_TRACE");
+
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const struct xt_target *target,
+       const void *targinfo)
+{
+	(*pskb)->nf_trace = 1;
+	return XT_CONTINUE;
+}
+
+static struct xt_target xt_trace_target[] = {
+	{
+		.name		= "TRACE",
+		.family		= AF_INET,
+		.target		= target,
+		.table		= "raw",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "TRACE",
+		.family		= AF_INET6,
+		.target		= target,
+		.table		= "raw",
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init xt_trace_init(void)
+{
+	return xt_register_targets(xt_trace_target,
+				   ARRAY_SIZE(xt_trace_target));
+}
+
+static void __exit xt_trace_fini(void)
+{
+	xt_unregister_targets(xt_trace_target, ARRAY_SIZE(xt_trace_target));
+}
+
+module_init(xt_trace_init);
+module_exit(xt_trace_fini);
-- 
cgit v0.10.2


From 9f15c5302de4e8b0aac7ca24c36bf26a7fe1a513 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:22:02 -0700
Subject: [NETFILTER]: x_tables: mark matches and targets __read_mostly

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 1d75a5c..e981232 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1140,13 +1140,13 @@ void arpt_unregister_table(struct arpt_table *table)
 }
 
 /* The built-in targets: standard (NULL) and error. */
-static struct arpt_target arpt_standard_target = {
+static struct arpt_target arpt_standard_target __read_mostly = {
 	.name		= ARPT_STANDARD_TARGET,
 	.targetsize	= sizeof(int),
 	.family		= NF_ARP,
 };
 
-static struct arpt_target arpt_error_target = {
+static struct arpt_target arpt_error_target __read_mostly = {
 	.name		= ARPT_ERROR_TARGET,
 	.target		= arpt_error,
 	.targetsize	= ARPT_FUNCTION_MAXNAMELEN,
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 497a16e..c4bdab4 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -81,7 +81,7 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target,
 	return true;
 }
 
-static struct arpt_target arpt_mangle_reg = {
+static struct arpt_target arpt_mangle_reg __read_mostly = {
 	.name		= "mangle",
 	.target		= target,
 	.targetsize	= sizeof(struct arpt_mangle),
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 650ab52..2ba5bd9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2264,7 +2264,7 @@ icmp_checkentry(const char *tablename,
 }
 
 /* The built-in targets: standard (NULL) and error. */
-static struct xt_target ipt_standard_target = {
+static struct xt_target ipt_standard_target __read_mostly = {
 	.name		= IPT_STANDARD_TARGET,
 	.targetsize	= sizeof(int),
 	.family		= AF_INET,
@@ -2275,7 +2275,7 @@ static struct xt_target ipt_standard_target = {
 #endif
 };
 
-static struct xt_target ipt_error_target = {
+static struct xt_target ipt_error_target __read_mostly = {
 	.name		= IPT_ERROR_TARGET,
 	.target		= ipt_error,
 	.targetsize	= IPT_FUNCTION_MAXNAMELEN,
@@ -2298,7 +2298,7 @@ static struct nf_sockopt_ops ipt_sockopts = {
 #endif
 };
 
-static struct xt_match icmp_matchstruct = {
+static struct xt_match icmp_matchstruct __read_mostly = {
 	.name		= "icmp",
 	.match		= icmp_match,
 	.matchsize	= sizeof(struct ipt_icmp),
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 5de13b4..1cef3b0 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -466,7 +466,7 @@ static void destroy(const struct xt_target *target, void *targinfo)
 	nf_ct_l3proto_module_put(target->family);
 }
 
-static struct xt_target clusterip_tgt = {
+static struct xt_target clusterip_tgt __read_mostly = {
 	.name		= "CLUSTERIP",
 	.family		= AF_INET,
 	.target		= target,
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index a647c1d..f1253bd 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -128,7 +128,7 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_target ipt_ecn_reg = {
+static struct xt_target ipt_ecn_reg __read_mostly = {
 	.name		= "ECN",
 	.family		= AF_INET,
 	.target		= target,
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index bcc43a6..9bfce61 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -463,7 +463,7 @@ static bool ipt_log_checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_target ipt_log_reg = {
+static struct xt_target ipt_log_reg __read_mostly = {
 	.name		= "LOG",
 	.family		= AF_INET,
 	.target		= ipt_log_target,
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index f136ef7..bc033e0 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -169,7 +169,7 @@ static struct notifier_block masq_inet_notifier = {
 	.notifier_call	= masq_inet_event,
 };
 
-static struct xt_target masquerade = {
+static struct xt_target masquerade __read_mostly = {
 	.name		= "MASQUERADE",
 	.family		= AF_INET,
 	.target		= masquerade_target,
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index a902c71..0a7ce15 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -85,7 +85,7 @@ target(struct sk_buff **pskb,
 	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
-static struct xt_target target_module = {
+static struct xt_target target_module __read_mostly = {
 	.name 		= MODULENAME,
 	.family		= AF_INET,
 	.target 	= target,
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 2a04103..61e1e47 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -101,7 +101,7 @@ redirect_target(struct sk_buff **pskb,
 	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
-static struct xt_target redirect_reg = {
+static struct xt_target redirect_reg __read_mostly = {
 	.name		= "REDIRECT",
 	.family		= AF_INET,
 	.target		= redirect_target,
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 90f7b70..dd5432c 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -240,7 +240,7 @@ static bool check(const char *tablename,
 	return true;
 }
 
-static struct xt_target ipt_reject_reg = {
+static struct xt_target ipt_reject_reg __read_mostly = {
 	.name		= "REJECT",
 	.family		= AF_INET,
 	.target		= reject,
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 3649fab..3a0d7da 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -161,7 +161,7 @@ same_target(struct sk_buff **pskb,
 	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
-static struct xt_target same_reg = {
+static struct xt_target same_reg __read_mostly = {
 	.name		= "SAME",
 	.family		= AF_INET,
 	.target		= same_target,
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index ac43e86..25f5d0b 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -63,7 +63,7 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_target ipt_tos_reg = {
+static struct xt_target ipt_tos_reg __read_mostly = {
 	.name		= "TOS",
 	.family		= AF_INET,
 	.target		= target,
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 737830b..2b54e7b 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -80,7 +80,7 @@ static bool ipt_ttl_checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_target ipt_TTL = {
+static struct xt_target ipt_TTL __read_mostly = {
 	.name 		= "TTL",
 	.family		= AF_INET,
 	.target 	= ipt_ttl_target,
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 5b25ca6..226750d 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -381,7 +381,7 @@ static int compat_to_user(void __user *dst, void *src)
 }
 #endif /* CONFIG_COMPAT */
 
-static struct xt_target ipt_ulog_reg = {
+static struct xt_target ipt_ulog_reg __read_mostly = {
 	.name		= "ULOG",
 	.family		= AF_INET,
 	.target		= ipt_ulog_target,
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index abea446..59f01f7 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -44,7 +44,7 @@ static bool match(const struct sk_buff *skb,
 	return ret;
 }
 
-static struct xt_match addrtype_match = {
+static struct xt_match addrtype_match __read_mostly = {
 	.name		= "addrtype",
 	.family		= AF_INET,
 	.match		= match,
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 49d503c..61b017f 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -88,7 +88,7 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match ah_match = {
+static struct xt_match ah_match __read_mostly = {
 	.name		= "ah",
 	.family		= AF_INET,
 	.match		= match,
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 3129e31..d6925c6 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -111,7 +111,7 @@ static bool checkentry(const char *tablename, const void *ip_void,
 	return true;
 }
 
-static struct xt_match ecn_match = {
+static struct xt_match ecn_match __read_mostly = {
 	.name		= "ecn",
 	.family		= AF_INET,
 	.match		= match,
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index 854281c..6a3a033 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -63,7 +63,7 @@ match(const struct sk_buff *skb,
 	return true;
 }
 
-static struct xt_match iprange_match = {
+static struct xt_match iprange_match __read_mostly = {
 	.name		= "iprange",
 	.family		= AF_INET,
 	.match		= match,
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index deea4b8..b14e77d 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -68,7 +68,7 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match owner_match = {
+static struct xt_match owner_match __read_mostly = {
 	.name		= "owner",
 	.family		= AF_INET,
 	.match		= match,
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 68f7181..a7b14f2 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -460,7 +460,7 @@ static const struct file_operations recent_fops = {
 };
 #endif /* CONFIG_PROC_FS */
 
-static struct xt_match recent_match = {
+static struct xt_match recent_match __read_mostly = {
 	.name		= "recent",
 	.family		= AF_INET,
 	.match		= ipt_recent_match,
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 67699ae..e740441 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -33,7 +33,7 @@ match(const struct sk_buff *skb,
 	return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
 }
 
-static struct xt_match tos_match = {
+static struct xt_match tos_match __read_mostly = {
 	.name		= "tos",
 	.family		= AF_INET,
 	.match		= match,
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index 59a644d..a439900 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -44,7 +44,7 @@ static bool match(const struct sk_buff *skb,
 	return false;
 }
 
-static struct xt_match ttl_match = {
+static struct xt_match ttl_match __read_mostly = {
 	.name		= "ttl",
 	.family		= AF_INET,
 	.match		= match,
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index ea1a07c..080393a 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -228,7 +228,7 @@ int nf_nat_rule_find(struct sk_buff **pskb,
 	return ret;
 }
 
-static struct xt_target ipt_snat_reg = {
+static struct xt_target ipt_snat_reg __read_mostly = {
 	.name		= "SNAT",
 	.target		= ipt_snat_target,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
@@ -238,7 +238,7 @@ static struct xt_target ipt_snat_reg = {
 	.family		= AF_INET,
 };
 
-static struct xt_target ipt_dnat_reg = {
+static struct xt_target ipt_dnat_reg __read_mostly = {
 	.name		= "DNAT",
 	.target		= ipt_dnat_target,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4f93b79..254c769 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1441,13 +1441,13 @@ icmp6_checkentry(const char *tablename,
 }
 
 /* The built-in targets: standard (NULL) and error. */
-static struct xt_target ip6t_standard_target = {
+static struct xt_target ip6t_standard_target __read_mostly = {
 	.name		= IP6T_STANDARD_TARGET,
 	.targetsize	= sizeof(int),
 	.family		= AF_INET6,
 };
 
-static struct xt_target ip6t_error_target = {
+static struct xt_target ip6t_error_target __read_mostly = {
 	.name		= IP6T_ERROR_TARGET,
 	.target		= ip6t_error,
 	.targetsize	= IP6T_FUNCTION_MAXNAMELEN,
@@ -1464,7 +1464,7 @@ static struct nf_sockopt_ops ip6t_sockopts = {
 	.get		= do_ip6t_get_ctl,
 };
 
-static struct xt_match icmp6_matchstruct = {
+static struct xt_match icmp6_matchstruct __read_mostly = {
 	.name		= "icmp6",
 	.match		= &icmp6_match,
 	.matchsize	= sizeof(struct ip6t_icmp),
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index 33c4cb8..ad4d943 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -79,7 +79,7 @@ static bool ip6t_hl_checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_target ip6t_HL = {
+static struct xt_target ip6t_HL __read_mostly = {
 	.name 		= "HL",
 	.family		= AF_INET6,
 	.target		= ip6t_hl_target,
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 996168d..540bf14 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -477,7 +477,7 @@ static bool ip6t_log_checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_target ip6t_log_reg = {
+static struct xt_target ip6t_log_reg __read_mostly = {
 	.name 		= "LOG",
 	.family		= AF_INET6,
 	.target 	= ip6t_log_target,
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 0fa1f2c..14008dc 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -244,7 +244,7 @@ static bool check(const char *tablename,
 	return true;
 }
 
-static struct xt_target ip6t_reject_reg = {
+static struct xt_target ip6t_reject_reg __read_mostly = {
 	.name		= "REJECT",
 	.family		= AF_INET6,
 	.target		= reject6_target,
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index fbf3d77..a9fe2aa 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -120,7 +120,7 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match ah_match = {
+static struct xt_match ah_match __read_mostly = {
 	.name		= "ah",
 	.family		= AF_INET6,
 	.match		= match,
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 2af99fc..34ba150 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -62,7 +62,7 @@ match(const struct sk_buff *skb,
 	return false;
 }
 
-static struct xt_match eui64_match = {
+static struct xt_match eui64_match __read_mostly = {
 	.name		= "eui64",
 	.family		= AF_INET6,
 	.match		= match,
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 65482af..bb1cfa8 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -137,7 +137,7 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match frag_match = {
+static struct xt_match frag_match __read_mostly = {
 	.name		= "frag",
 	.family		= AF_INET6,
 	.match		= match,
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 8eecac1..6247d4c 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -193,7 +193,7 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match opts_match[] = {
+static struct xt_match opts_match[] __read_mostly = {
 	{
 		.name		= "hbh",
 		.family		= AF_INET6,
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index ddee088..ca29ec0 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -49,7 +49,7 @@ static bool match(const struct sk_buff *skb,
 	return false;
 }
 
-static struct xt_match hl_match = {
+static struct xt_match hl_match __read_mostly = {
 	.name		= "hl",
 	.family		= AF_INET6,
 	.match		= match,
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index ca020ce..2c65c2f 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -141,7 +141,7 @@ ipv6header_checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match ip6t_ipv6header_match = {
+static struct xt_match ip6t_ipv6header_match __read_mostly = {
 	.name		= "ipv6header",
 	.family		= AF_INET6,
 	.match		= &ipv6header_match,
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index e94fdd8..0fa7140 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -89,7 +89,7 @@ mh_checkentry(const char *tablename,
 	return !(mhinfo->invflags & ~IP6T_MH_INV_MASK);
 }
 
-static struct xt_match mh_match = {
+static struct xt_match mh_match __read_mostly = {
 	.name		= "mh",
 	.family		= AF_INET6,
 	.checkentry	= mh_checkentry,
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index d2bf320..6036613 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -68,7 +68,7 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match owner_match = {
+static struct xt_match owner_match __read_mostly = {
 	.name		= "owner",
 	.family		= AF_INET6,
 	.match		= match,
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index f86fdcd..549deea 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -224,7 +224,7 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match rt_match = {
+static struct xt_match rt_match __read_mostly = {
 	.name		= "rt",
 	.family		= AF_INET6,
 	.match		= match,
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 3088483..5194285 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -39,7 +39,7 @@ target(struct sk_buff **pskb,
 	return XT_CONTINUE;
 }
 
-static struct xt_target xt_classify_target[] = {
+static struct xt_target xt_classify_target[] __read_mostly = {
 	{
 		.family		= AF_INET,
 		.name 		= "CLASSIFY",
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 4284a59..5a00c54 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -142,7 +142,7 @@ static int compat_to_user(void __user *dst, void *src)
 }
 #endif /* CONFIG_COMPAT */
 
-static struct xt_target xt_connmark_target[] = {
+static struct xt_target xt_connmark_target[] __read_mostly = {
 	{
 		.name		= "CONNMARK",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 8d5e154..63d7313 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -115,7 +115,7 @@ destroy(const struct xt_target *target, void *targinfo)
 	nf_ct_l3proto_module_put(target->family);
 }
 
-static struct xt_target xt_connsecmark_target[] = {
+static struct xt_target xt_connsecmark_target[] __read_mostly = {
 	{
 		.name		= "CONNSECMARK",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index ed6b524..798ab73 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -81,7 +81,7 @@ static bool checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_target xt_dscp_target[] = {
+static struct xt_target xt_dscp_target[] __read_mostly = {
 	{
 		.name		= "DSCP",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index 6b7369f..f30fe0b 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -133,7 +133,7 @@ static int compat_to_user_v1(void __user *dst, void *src)
 }
 #endif /* CONFIG_COMPAT */
 
-static struct xt_target xt_mark_target[] = {
+static struct xt_target xt_mark_target[] __read_mostly = {
 	{
 		.name		= "MARK",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 20e55d5..d3594c7 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -52,7 +52,7 @@ nflog_checkentry(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct xt_target xt_nflog_target[] = {
+static struct xt_target xt_nflog_target[] __read_mostly = {
 	{
 		.name		= "NFLOG",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 201155b..13f59f3 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -36,7 +36,7 @@ target(struct sk_buff **pskb,
 	return NF_QUEUE_NR(tinfo->queuenum);
 }
 
-static struct xt_target xt_nfqueue_target[] = {
+static struct xt_target xt_nfqueue_target[] __read_mostly = {
 	{
 		.name		= "NFQUEUE",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 5085fb3..b7d6312 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -33,7 +33,7 @@ target(struct sk_buff **pskb,
 	return XT_CONTINUE;
 }
 
-static struct xt_target xt_notrack_target[] = {
+static struct xt_target xt_notrack_target[] __read_mostly = {
 	{
 		.name		= "NOTRACK",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index f3e78c5..c83779a 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -109,7 +109,7 @@ static bool checkentry(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct xt_target xt_secmark_target[] = {
+static struct xt_target xt_secmark_target[] __read_mostly = {
 	{
 		.name		= "SECMARK",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 6ae6df9..d40f7e4 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -259,7 +259,7 @@ xt_tcpmss_checkentry6(const char *tablename,
 }
 #endif
 
-static struct xt_target xt_tcpmss_reg[] = {
+static struct xt_target xt_tcpmss_reg[] __read_mostly = {
 	{
 		.family		= AF_INET,
 		.name		= "TCPMSS",
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index b82fc46..4df2ded 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -21,7 +21,7 @@ target(struct sk_buff **pskb,
 	return XT_CONTINUE;
 }
 
-static struct xt_target xt_trace_target[] = {
+static struct xt_target xt_trace_target[] __read_mostly = {
 	{
 		.name		= "TRACE",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index aa9503f..64bcdb0 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -29,7 +29,7 @@ match(const struct sk_buff *skb,
 	return true;
 }
 
-static struct xt_match xt_comment_match[] = {
+static struct xt_match xt_comment_match[] __read_mostly = {
 	{
 		.name		= "comment",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index d9b2e75..dd4d79b 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -128,7 +128,7 @@ destroy(const struct xt_match *match, void *matchinfo)
 	nf_ct_l3proto_module_put(match->family);
 }
 
-static struct xt_match xt_connbytes_match[] = {
+static struct xt_match xt_connbytes_match[] __read_mostly = {
 	{
 		.name		= "connbytes",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 3a6e16d..e73fa9b 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -109,7 +109,7 @@ static int compat_to_user(void __user *dst, void *src)
 }
 #endif /* CONFIG_COMPAT */
 
-static struct xt_match xt_connmark_match[] = {
+static struct xt_match xt_connmark_match[] __read_mostly = {
 	{
 		.name		= "connmark",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 9e3ec31..ca4b69f 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -183,7 +183,7 @@ static int compat_to_user(void __user *dst, void *src)
 }
 #endif
 
-static struct xt_match conntrack_match = {
+static struct xt_match conntrack_match __read_mostly = {
 	.name		= "conntrack",
 	.match		= match,
 	.checkentry	= checkentry,
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index f07a68d..83224ec 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -140,7 +140,7 @@ checkentry(const char *tablename,
 		&& !(info->invflags & ~info->flags);
 }
 
-static struct xt_match xt_dccp_match[] = {
+static struct xt_match xt_dccp_match[] __read_mostly = {
 	{
 		.name 		= "dccp",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 35cabca..dde6d66 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -68,7 +68,7 @@ static bool checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match xt_dscp_match[] = {
+static struct xt_match xt_dscp_match[] __read_mostly = {
 	{
 		.name		= "dscp",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 1a6ae8a..b11378e 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -91,7 +91,7 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match xt_esp_match[] = {
+static struct xt_match xt_esp_match[] __read_mostly = {
 	{
 		.name		= "esp",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 094da6e..5a6ea9b 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -578,7 +578,7 @@ static int compat_to_user(void __user *dst, void *src)
 }
 #endif
 
-static struct xt_match xt_hashlimit[] = {
+static struct xt_match xt_hashlimit[] __read_mostly = {
 	{
 		.name		= "hashlimit",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 047d004..d03acb0 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -99,7 +99,7 @@ destroy(const struct xt_match *match, void *matchinfo)
 	nf_ct_l3proto_module_put(match->family);
 }
 
-static struct xt_match xt_helper_match[] = {
+static struct xt_match xt_helper_match[] __read_mostly = {
 	{
 		.name		= "helper",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index ea4880b..3dad173 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -53,7 +53,7 @@ match6(const struct sk_buff *skb,
 	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
 }
 
-static struct xt_match xt_length_match[] = {
+static struct xt_match xt_length_match[] __read_mostly = {
 	{
 		.name		= "length",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index b042419..4fcca79 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -173,7 +173,7 @@ static int compat_to_user(void __user *dst, void *src)
 }
 #endif /* CONFIG_COMPAT */
 
-static struct xt_match xt_limit_match[] = {
+static struct xt_match xt_limit_match[] __read_mostly = {
 	{
 		.name		= "limit",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 28ec08e..00490d7 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -44,7 +44,7 @@ match(const struct sk_buff *skb,
 		^ info->invert);
 }
 
-static struct xt_match xt_mac_match[] = {
+static struct xt_match xt_mac_match[] __read_mostly = {
 	{
 		.name		= "mac",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index b8ab794..c02a7f8 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -81,7 +81,7 @@ static int compat_to_user(void __user *dst, void *src)
 }
 #endif /* CONFIG_COMPAT */
 
-static struct xt_match xt_mark_match[] = {
+static struct xt_match xt_mark_match[] __read_mostly = {
 	{
 		.name		= "mark",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 3d69d62..e8ae102 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -228,7 +228,7 @@ checkentry6_v1(const char *tablename,
 		     multiinfo->count);
 }
 
-static struct xt_match xt_multiport_match[] = {
+static struct xt_match xt_multiport_match[] __read_mostly = {
 	{
 		.name		= "multiport",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 467b2dc..f47cab7a6 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -125,7 +125,7 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match xt_physdev_match[] = {
+static struct xt_match xt_physdev_match[] __read_mostly = {
 	{
 		.name		= "physdev",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index e4c420b..a52925f 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -43,7 +43,7 @@ static bool match(const struct sk_buff *skb,
 	return (type == info->pkttype) ^ info->invert;
 }
 
-static struct xt_match xt_pkttype_match[] = {
+static struct xt_match xt_pkttype_match[] __read_mostly = {
 	{
 		.name		= "pkttype",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 5ab6d71..6d6d3b7 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -164,7 +164,7 @@ static bool checkentry(const char *tablename, const void *ip_void,
 	return true;
 }
 
-static struct xt_match xt_policy_match[] = {
+static struct xt_match xt_policy_match[] __read_mostly = {
 	{
 		.name		= "policy",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index feb130d..dae9744 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -53,7 +53,7 @@ checkentry(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct xt_match xt_quota_match[] = {
+static struct xt_match xt_quota_match[] __read_mostly = {
 	{
 		.name		= "quota",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 44b807d..cc3e76d 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -37,7 +37,7 @@ match(const struct sk_buff *skb,
 	return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
 }
 
-static struct xt_match realm_match = {
+static struct xt_match realm_match __read_mostly = {
 	.name		= "realm",
 	.match		= match,
 	.matchsize	= sizeof(struct xt_realm_info),
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index fefc846..c002153 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -172,7 +172,7 @@ checkentry(const char *tablename,
 				| SCTP_CHUNK_MATCH_ONLY)));
 }
 
-static struct xt_match xt_sctp_match[] = {
+static struct xt_match xt_sctp_match[] __read_mostly = {
 	{
 		.name		= "sctp",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 5b9c59a..e0a528d 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -64,7 +64,7 @@ destroy(const struct xt_match *match, void *matchinfo)
 	nf_ct_l3proto_module_put(match->family);
 }
 
-static struct xt_match xt_state_match[] = {
+static struct xt_match xt_state_match[] __read_mostly = {
 	{
 		.name		= "state",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 3da4978..4089dae 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -66,7 +66,7 @@ checkentry(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct xt_match xt_statistic_match[] = {
+static struct xt_match xt_statistic_match[] __read_mostly = {
 	{
 		.name		= "statistic",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index ab761b1..8641334 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -73,7 +73,7 @@ static void destroy(const struct xt_match *match, void *matchinfo)
 	textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
 }
 
-static struct xt_match xt_string_match[] = {
+static struct xt_match xt_string_match[] __read_mostly = {
 	{
 		.name 		= "string",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index e9bfd3d..cd5f6d7 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -81,7 +81,7 @@ dropit:
 	return false;
 }
 
-static struct xt_match xt_tcpmss_match[] = {
+static struct xt_match xt_tcpmss_match[] __read_mostly = {
 	{
 		.name		= "tcpmss",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 5cb345a..ab7d845 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -194,7 +194,7 @@ udp_checkentry(const char *tablename,
 	return !(udpinfo->invflags & ~XT_UDP_INV_MASK);
 }
 
-static struct xt_match xt_tcpudp_match[] = {
+static struct xt_match xt_tcpudp_match[] __read_mostly = {
 	{
 		.name		= "tcp",
 		.family		= AF_INET,
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index 0706875..04b677a 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -99,7 +99,7 @@ static bool u32_match(const struct sk_buff *skb,
 	return ret ^ data->invert;
 }
 
-static struct xt_match u32_reg[] = {
+static struct xt_match u32_reg[] __read_mostly = {
 	{
 		.name       = "u32",
 		.family     = AF_INET,
-- 
cgit v0.10.2


From 4ba887790ce2015e8c464809c0be902fb813ad15 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sat, 7 Jul 2007 22:22:33 -0700
Subject: [NETFILTER]: nf_nat: move NAT declarations from nf_conntrack_ipv4.h
 to nf_nat.h

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 1401ccc..3ed4e14 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -9,29 +9,8 @@
 #ifndef _NF_CONNTRACK_IPV4_H
 #define _NF_CONNTRACK_IPV4_H
 
-#ifdef CONFIG_NF_NAT_NEEDED
-#include <net/netfilter/nf_nat.h>
-#include <linux/netfilter/nf_conntrack_pptp.h>
-
-/* per conntrack: nat application helper private data */
-union nf_conntrack_nat_help {
-        /* insert nat helper private data here */
-	struct nf_nat_pptp nat_pptp_info;
-};
-
-struct nf_conn_nat {
-	struct nf_nat_info info;
-	union nf_conntrack_nat_help help;
-#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
-	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
-	int masq_index;
-#endif
-};
-#endif /* CONFIG_NF_NAT_NEEDED */
-
 /* Returns new sk_buff, or NULL */
-struct sk_buff *
-nf_ct_ipv4_ct_gather_frags(struct sk_buff *skb);
+struct sk_buff *nf_ct_ipv4_ct_gather_frags(struct sk_buff *skb);
 
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 4732432..12a0e79 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -280,6 +280,7 @@ nf_conntrack_unregister_cache(u_int32_t features);
  * nat: nf_conn .. nf_conn_nat, nf_conn .. nf_conn_nat .. nf_conn help
  */
 #ifdef CONFIG_NF_NAT_NEEDED
+#include <net/netfilter/nf_nat.h>
 static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
 {
 	unsigned int offset = sizeof(struct nf_conn);
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index bc57dd7..47d3dc1 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -51,6 +51,7 @@ struct nf_nat_multi_range_compat
 
 #ifdef __KERNEL__
 #include <linux/list.h>
+#include <linux/netfilter/nf_conntrack_pptp.h>
 
 /* The structure embedded in the conntrack structure. */
 struct nf_nat_info
@@ -59,6 +60,23 @@ struct nf_nat_info
 	struct nf_nat_seq seq[IP_CT_DIR_MAX];
 };
 
+/* per conntrack: nat application helper private data */
+union nf_conntrack_nat_help
+{
+	/* insert nat helper private data here */
+	struct nf_nat_pptp nat_pptp_info;
+};
+
+struct nf_conn_nat
+{
+	struct nf_nat_info info;
+	union nf_conntrack_nat_help help;
+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+    defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
+	int masq_index;
+#endif
+};
+
 struct nf_conn;
 
 /* Set up the info structure to map into this range. */
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
index 9778ffa..c3cd127 100644
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -2,6 +2,7 @@
 #define _NF_NAT_CORE_H
 #include <linux/list.h>
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
 
 /* This header used to share core functionality between the standalone
    NAT module, and the compatibility layer's use of NAT for masquerading. */
-- 
cgit v0.10.2


From ecfab2c9fe5597221c2b30dec48634a2361a0d08 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sat, 7 Jul 2007 22:23:21 -0700
Subject: [NETFILTER]: nf_conntrack: introduce extension infrastructure

Old space allocator of conntrack had problems about extensibility.
- It required slab cache per combination of extensions.
- It expected what extensions would be assigned, but it was impossible
  to expect that completely, then we allocated bigger memory object than
  really required.
- It needed to search helper twice due to lock issue.

Now basic informations of a connection are stored in 'struct nf_conn'.
And a storage for extension (helper, NAT) is allocated by kmalloc.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 12a0e79..c31382d 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -131,6 +131,9 @@ struct nf_conn
 	/* Storage reserved for other modules: */
 	union nf_conntrack_proto proto;
 
+	/* Extensions */
+	struct nf_ct_ext *ext;
+
 	/* features dynamically at the end: helper, nat (both optional) */
 	char data[0];
 };
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
new file mode 100644
index 0000000..8a988d1
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -0,0 +1,80 @@
+#ifndef _NF_CONNTRACK_EXTEND_H
+#define _NF_CONNTRACK_EXTEND_H
+
+#include <net/netfilter/nf_conntrack.h>
+
+enum nf_ct_ext_id
+{
+	NF_CT_EXT_NUM,
+};
+
+/* Extensions: optional stuff which isn't permanently in struct. */
+struct nf_ct_ext {
+	u8 offset[NF_CT_EXT_NUM];
+	u8 len;
+	u8 real_len;
+	char data[0];
+};
+
+static inline int nf_ct_ext_exist(const struct nf_conn *ct, u8 id)
+{
+	return (ct->ext && ct->ext->offset[id]);
+}
+
+static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id)
+{
+	if (!nf_ct_ext_exist(ct, id))
+		return NULL;
+
+	return (void *)ct->ext + ct->ext->offset[id];
+}
+#define nf_ct_ext_find(ext, id)	\
+	((id##_TYPE *)__nf_ct_ext_find((ext), (id)))
+
+/* Destroy all relationships */
+extern void __nf_ct_ext_destroy(struct nf_conn *ct);
+static inline void nf_ct_ext_destroy(struct nf_conn *ct)
+{
+	if (ct->ext)
+		__nf_ct_ext_destroy(ct);
+}
+
+/* Free operation. If you want to free a object referred from private area,
+ * please implement __nf_ct_ext_free() and call it.
+ */
+static inline void nf_ct_ext_free(struct nf_conn *ct)
+{
+	if (ct->ext)
+		kfree(ct->ext);
+}
+
+/* Add this type, returns pointer to data or NULL. */
+void *
+__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp);
+#define nf_ct_ext_add(ct, id, gfp) \
+	((id##_TYPE *)__nf_ct_ext_add((ct), (id), (gfp)))
+
+#define NF_CT_EXT_F_PREALLOC	0x0001
+
+struct nf_ct_ext_type
+{
+	/* Destroys relationships (can be NULL). */
+	void (*destroy)(struct nf_conn *ct);
+	/* Called when realloacted (can be NULL).
+	   Contents has already been moved. */
+	void (*move)(struct nf_conn *ct, void *old);
+
+	enum nf_ct_ext_id id;
+
+	unsigned int flags;
+
+	/* Length and min alignment. */
+	u8 len;
+	u8 align;
+	/* initial size of nf_ct_ext. */
+	u8 alloc_size;
+};
+
+int nf_ct_extend_register(struct nf_ct_ext_type *type);
+void nf_ct_extend_unregister(struct nf_ct_ext_type *type);
+#endif /* _NF_CONNTRACK_EXTEND_H */
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 3b79268..58b4245 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,6 @@
 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
 
-nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o
+nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 7a15e30..b56f954 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -36,6 +36,7 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_extend.h>
 
 #define NF_CONNTRACK_VERSION	"0.5.0"
 
@@ -317,6 +318,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	if (l4proto && l4proto->destroy)
 		l4proto->destroy(ct);
 
+	nf_ct_ext_destroy(ct);
+
 	destroyed = rcu_dereference(nf_conntrack_destroyed);
 	if (destroyed)
 		destroyed(ct);
@@ -650,6 +653,7 @@ void nf_conntrack_free(struct nf_conn *conntrack)
 {
 	u_int32_t features = conntrack->features;
 	NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM);
+	nf_ct_ext_free(conntrack);
 	DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features,
 	       conntrack);
 	kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
new file mode 100644
index 0000000..a1a65a1
--- /dev/null
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -0,0 +1,195 @@
+/* Structure dynamic extension infrastructure
+ * Copyright (C) 2004 Rusty Russell IBM Corporation
+ * Copyright (C) 2007 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (C) 2007 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+
+static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM];
+static DEFINE_MUTEX(nf_ct_ext_type_mutex);
+
+/* Horrible trick to figure out smallest amount worth kmallocing. */
+#define CACHE(x) (x) + 0 *
+enum {
+	NF_CT_EXT_MIN_SIZE =
+#include <linux/kmalloc_sizes.h>
+	1 };
+#undef CACHE
+
+void __nf_ct_ext_destroy(struct nf_conn *ct)
+{
+	unsigned int i;
+	struct nf_ct_ext_type *t;
+
+	for (i = 0; i < NF_CT_EXT_NUM; i++) {
+		if (!nf_ct_ext_exist(ct, i))
+			continue;
+
+		rcu_read_lock();
+		t = rcu_dereference(nf_ct_ext_types[i]);
+
+		/* Here the nf_ct_ext_type might have been unregisterd.
+		 * I.e., it has responsible to cleanup private
+		 * area in all conntracks when it is unregisterd.
+		 */
+		if (t && t->destroy)
+			t->destroy(ct);
+		rcu_read_unlock();
+	}
+}
+EXPORT_SYMBOL(__nf_ct_ext_destroy);
+
+static void *
+nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
+{
+	unsigned int off, len, real_len;
+	struct nf_ct_ext_type *t;
+
+	rcu_read_lock();
+	t = rcu_dereference(nf_ct_ext_types[id]);
+	BUG_ON(t == NULL);
+	off = ALIGN(sizeof(struct nf_ct_ext), t->align);
+	len = off + t->len;
+	real_len = t->alloc_size;
+	rcu_read_unlock();
+
+	*ext = kzalloc(real_len, gfp);
+	if (!*ext)
+		return NULL;
+
+	(*ext)->offset[id] = off;
+	(*ext)->len = len;
+	(*ext)->real_len = real_len;
+
+	return (void *)(*ext) + off;
+}
+
+void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
+{
+	struct nf_ct_ext *new;
+	int i, newlen, newoff;
+	struct nf_ct_ext_type *t;
+
+	if (!ct->ext)
+		return nf_ct_ext_create(&ct->ext, id, gfp);
+
+	if (nf_ct_ext_exist(ct, id))
+		return NULL;
+
+	rcu_read_lock();
+	t = rcu_dereference(nf_ct_ext_types[id]);
+	BUG_ON(t == NULL);
+
+	newoff = ALIGN(ct->ext->len, t->align);
+	newlen = newoff + t->len;
+	rcu_read_unlock();
+
+	if (newlen >= ct->ext->real_len) {
+		new = kmalloc(newlen, gfp);
+		if (!new)
+			return NULL;
+
+		memcpy(new, ct->ext, ct->ext->len);
+
+		for (i = 0; i < NF_CT_EXT_NUM; i++) {
+			if (!nf_ct_ext_exist(ct, i))
+				continue;
+
+			rcu_read_lock();
+			t = rcu_dereference(nf_ct_ext_types[i]);
+			if (t && t->move)
+				t->move(ct, ct->ext + ct->ext->offset[id]);
+			rcu_read_unlock();
+		}
+		kfree(ct->ext);
+		new->real_len = newlen;
+		ct->ext = new;
+	}
+
+	ct->ext->offset[id] = newoff;
+	ct->ext->len = newlen;
+	memset((void *)ct->ext + newoff, 0, newlen - newoff);
+	return (void *)ct->ext + newoff;
+}
+EXPORT_SYMBOL(__nf_ct_ext_add);
+
+static void update_alloc_size(struct nf_ct_ext_type *type)
+{
+	int i, j;
+	struct nf_ct_ext_type *t1, *t2;
+	enum nf_ct_ext_id min = 0, max = NF_CT_EXT_NUM - 1;
+
+	/* unnecessary to update all types */
+	if ((type->flags & NF_CT_EXT_F_PREALLOC) == 0) {
+		min = type->id;
+		max = type->id;
+	}
+
+	/* This assumes that extended areas in conntrack for the types
+	   whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
+	for (i = min; i <= max; i++) {
+		t1 = nf_ct_ext_types[i];
+		if (!t1)
+			continue;
+
+		t1->alloc_size = sizeof(struct nf_ct_ext)
+				 + ALIGN(sizeof(struct nf_ct_ext), t1->align)
+				 + t1->len;
+		for (j = 0; j < NF_CT_EXT_NUM; j++) {
+			t2 = nf_ct_ext_types[j];
+			if (t2 == NULL || t2 == t1 ||
+			    (t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
+				continue;
+
+			t1->alloc_size = ALIGN(t1->alloc_size, t2->align)
+					 + t2->len;
+		}
+		if (t1->alloc_size < NF_CT_EXT_MIN_SIZE)
+			t1->alloc_size = NF_CT_EXT_MIN_SIZE;
+	}
+}
+
+/* This MUST be called in process context. */
+int nf_ct_extend_register(struct nf_ct_ext_type *type)
+{
+	int ret = 0;
+
+	mutex_lock(&nf_ct_ext_type_mutex);
+	if (nf_ct_ext_types[type->id]) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* This ensures that nf_ct_ext_create() can allocate enough area
+	   before updating alloc_size */
+	type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align)
+			   + type->len;
+	rcu_assign_pointer(nf_ct_ext_types[type->id], type);
+	update_alloc_size(type);
+out:
+	mutex_unlock(&nf_ct_ext_type_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_extend_register);
+
+/* This MUST be called in process context. */
+void nf_ct_extend_unregister(struct nf_ct_ext_type *type)
+{
+	mutex_lock(&nf_ct_ext_type_mutex);
+	rcu_assign_pointer(nf_ct_ext_types[type->id], NULL);
+	update_alloc_size(type);
+	mutex_unlock(&nf_ct_ext_type_mutex);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(nf_ct_extend_unregister);
-- 
cgit v0.10.2


From ceceae1b1555a9afcb8dacf90df5fa1f20fd5466 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sat, 7 Jul 2007 22:23:42 -0700
Subject: [NETFILTER]: nf_conntrack: use extension infrastructure for helper

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index c31382d..f1e0fee 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -294,32 +294,6 @@ static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
 	offset = ALIGN(offset, __alignof__(struct nf_conn_nat));
 	return (struct nf_conn_nat *) ((void *)ct + offset);
 }
-
-static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
-{
-	unsigned int offset = sizeof(struct nf_conn);
-
-	if (!(ct->features & NF_CT_F_HELP))
-		return NULL;
-	if (ct->features & NF_CT_F_NAT) {
-		offset = ALIGN(offset, __alignof__(struct nf_conn_nat));
-		offset += sizeof(struct nf_conn_nat);
-	}
-
-	offset = ALIGN(offset, __alignof__(struct nf_conn_help));
-	return (struct nf_conn_help *) ((void *)ct + offset);
-}
-#else /* No NAT */
-static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
-{
-	unsigned int offset = sizeof(struct nf_conn);
-
-	if (!(ct->features & NF_CT_F_HELP))
-		return NULL;
-
-	offset = ALIGN(offset, __alignof__(struct nf_conn_help));
-	return (struct nf_conn_help *) ((void *)ct + offset);
-}
 #endif /* CONFIG_NF_NAT_NEEDED */
 #endif /* __KERNEL__ */
 #endif /* _NF_CONNTRACK_H */
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 9fb9066..3bf7d05 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -30,6 +30,9 @@ extern void nf_conntrack_cleanup(void);
 extern int nf_conntrack_proto_init(void);
 extern void nf_conntrack_proto_fini(void);
 
+extern int nf_conntrack_helper_init(void);
+extern void nf_conntrack_helper_fini(void);
+
 struct nf_conntrack_l3proto;
 extern struct nf_conntrack_l3proto *nf_ct_find_l3proto(u_int16_t pf);
 /* Like above, but you already have conntrack read lock. */
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index 8a988d1..05357dc 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -5,9 +5,12 @@
 
 enum nf_ct_ext_id
 {
+	NF_CT_EXT_HELPER,
 	NF_CT_EXT_NUM,
 };
 
+#define NF_CT_EXT_HELPER_TYPE struct nf_conn_help
+
 /* Extensions: optional stuff which isn't permanently in struct. */
 struct nf_ct_ext {
 	u8 offset[NF_CT_EXT_NUM];
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 8c72ac9..b43a75b 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -10,6 +10,7 @@
 #ifndef _NF_CONNTRACK_HELPER_H
 #define _NF_CONNTRACK_HELPER_H
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
 
 struct module;
 
@@ -52,4 +53,8 @@ extern void nf_ct_helper_put(struct nf_conntrack_helper *helper);
 extern int nf_conntrack_helper_register(struct nf_conntrack_helper *);
 extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *);
 
+static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
+{
+	return nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
+}
 #endif /*_NF_CONNTRACK_HELPER_H*/
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 55dac36..0b2f0c3 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -338,14 +338,6 @@ static int __init nf_nat_standalone_init(void)
 		return ret;
 	}
 
-	size = ALIGN(size, __alignof__(struct nf_conn_help)) +
-	       sizeof(struct nf_conn_help);
-	ret = nf_conntrack_register_cache(NF_CT_F_NAT|NF_CT_F_HELP,
-					  "nf_nat:help", size);
-	if (ret < 0) {
-		printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n");
-		goto cleanup_register_cache;
-	}
 #ifdef CONFIG_XFRM
 	BUG_ON(ip_nat_decode_session != NULL);
 	ip_nat_decode_session = nat_decode_session;
@@ -370,8 +362,6 @@ static int __init nf_nat_standalone_init(void)
 	ip_nat_decode_session = NULL;
 	synchronize_net();
 #endif
-	nf_conntrack_unregister_cache(NF_CT_F_NAT|NF_CT_F_HELP);
- cleanup_register_cache:
 	nf_conntrack_unregister_cache(NF_CT_F_NAT);
 	return ret;
 }
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b56f954..914506e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -566,7 +566,6 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 		     u_int32_t features)
 {
 	struct nf_conn *conntrack = NULL;
-	struct nf_conntrack_helper *helper;
 
 	if (unlikely(!nf_conntrack_hash_rnd_initted)) {
 		get_random_bytes(&nf_conntrack_hash_rnd, 4);
@@ -593,14 +592,6 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 	/*  find features needed by this conntrack. */
 	features |= l3proto->get_features(orig);
 
-	/* FIXME: protect helper list per RCU */
-	read_lock_bh(&nf_conntrack_lock);
-	helper = __nf_ct_helper_find(repl);
-	/* NAT might want to assign a helper later */
-	if (helper || features & NF_CT_F_NAT)
-		features |= NF_CT_F_HELP;
-	read_unlock_bh(&nf_conntrack_lock);
-
 	DEBUGP("nf_conntrack_alloc: features=0x%x\n", features);
 
 	read_lock_bh(&nf_ct_cache_lock);
@@ -681,12 +672,6 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 		return NULL;
 	}
 
-	read_lock_bh(&nf_conntrack_lock);
-	exp = __nf_conntrack_expect_find(tuple);
-	if (exp && exp->helper)
-		features = NF_CT_F_HELP;
-	read_unlock_bh(&nf_conntrack_lock);
-
 	conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto, features);
 	if (conntrack == NULL || IS_ERR(conntrack)) {
 		DEBUGP("Can't allocate conntrack.\n");
@@ -701,16 +686,21 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 
 	write_lock_bh(&nf_conntrack_lock);
 	exp = find_expectation(tuple);
-
-	help = nfct_help(conntrack);
 	if (exp) {
 		DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
 			conntrack, exp);
 		/* Welcome, Mr. Bond.  We've been expecting you... */
 		__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
 		conntrack->master = exp->master;
-		if (exp->helper)
-			rcu_assign_pointer(help->helper, exp->helper);
+		if (exp->helper) {
+			help = nf_ct_ext_add(conntrack, NF_CT_EXT_HELPER,
+					     GFP_ATOMIC);
+			if (help)
+				rcu_assign_pointer(help->helper, exp->helper);
+			else
+				DEBUGP("failed to add helper extension area");
+		}
+
 #ifdef CONFIG_NF_CONNTRACK_MARK
 		conntrack->mark = exp->master->mark;
 #endif
@@ -720,10 +710,18 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 		nf_conntrack_get(&conntrack->master->ct_general);
 		NF_CT_STAT_INC(expect_new);
 	} else {
-		if (help) {
-			/* not in hash table yet, so not strictly necessary */
-			rcu_assign_pointer(help->helper,
-					   __nf_ct_helper_find(&repl_tuple));
+		struct nf_conntrack_helper *helper;
+
+		helper = __nf_ct_helper_find(&repl_tuple);
+		if (helper) {
+			help = nf_ct_ext_add(conntrack, NF_CT_EXT_HELPER,
+					     GFP_ATOMIC);
+			if (help)
+				/* not in hash table yet, so not strictly
+				   necessary */
+				rcu_assign_pointer(help->helper, helper);
+			else
+				DEBUGP("failed to add helper extension area");
 		}
 		NF_CT_STAT_INC(new);
 	}
@@ -892,6 +890,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 			      const struct nf_conntrack_tuple *newreply)
 {
 	struct nf_conn_help *help = nfct_help(ct);
+	struct nf_conntrack_helper *helper;
 
 	write_lock_bh(&nf_conntrack_lock);
 	/* Should be unconfirmed, so not in hash table yet */
@@ -901,14 +900,28 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 	NF_CT_DUMP_TUPLE(newreply);
 
 	ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
-	if (!ct->master && help && help->expecting == 0) {
-		struct nf_conntrack_helper *helper;
-		helper = __nf_ct_helper_find(newreply);
-		if (helper)
-			memset(&help->help, 0, sizeof(help->help));
-		/* not in hash table yet, so not strictly necessary */
-		rcu_assign_pointer(help->helper, helper);
+	if (ct->master || (help && help->expecting != 0))
+		goto out;
+
+	helper = __nf_ct_helper_find(newreply);
+	if (helper == NULL) {
+		if (help)
+			rcu_assign_pointer(help->helper, NULL);
+		goto out;
 	}
+
+	if (help == NULL) {
+		help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, GFP_ATOMIC);
+		if (help == NULL) {
+			DEBUGP("failed to add helper extension area");
+			goto out;
+		}
+	} else {
+		memset(&help->help, 0, sizeof(help->help));
+	}
+
+	rcu_assign_pointer(help->helper, helper);
+out:
 	write_unlock_bh(&nf_conntrack_lock);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
@@ -1150,6 +1163,7 @@ void nf_conntrack_cleanup(void)
 			    nf_conntrack_htable_size);
 
 	nf_conntrack_proto_fini();
+	nf_conntrack_helper_fini();
 }
 
 static struct list_head *alloc_hashtable(int size, int *vmalloced)
@@ -1272,6 +1286,10 @@ int __init nf_conntrack_init(void)
 	if (ret < 0)
 		goto out_free_expect_slab;
 
+	ret = nf_conntrack_helper_init();
+	if (ret < 0)
+		goto out_fini_proto;
+
 	/* For use by REJECT target */
 	rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach);
 	rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
@@ -1284,6 +1302,8 @@ int __init nf_conntrack_init(void)
 
 	return ret;
 
+out_fini_proto:
+	nf_conntrack_proto_fini();
 out_free_expect_slab:
 	kmem_cache_destroy(nf_conntrack_expect_cachep);
 err_free_conntrack_slab:
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index f868b7f..6d32399 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -26,6 +26,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_extend.h>
 
 static __read_mostly LIST_HEAD(helpers);
 
@@ -100,18 +101,8 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
 
 int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 {
-	int size, ret;
-
 	BUG_ON(me->timeout == 0);
 
-	size = ALIGN(sizeof(struct nf_conn), __alignof__(struct nf_conn_help)) +
-	       sizeof(struct nf_conn_help);
-	ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help",
-					  size);
-	if (ret < 0) {
-		printk(KERN_ERR "nf_conntrack_helper_register: Unable to create slab cache for conntracks\n");
-		return ret;
-	}
 	write_lock_bh(&nf_conntrack_lock);
 	list_add(&me->list, &helpers);
 	write_unlock_bh(&nf_conntrack_lock);
@@ -153,3 +144,19 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 	synchronize_net();
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
+
+struct nf_ct_ext_type helper_extend = {
+	.len	= sizeof(struct nf_conn_help),
+	.align	= __alignof__(struct nf_conn_help),
+	.id	= NF_CT_EXT_HELPER,
+};
+
+int nf_conntrack_helper_init()
+{
+	return nf_ct_extend_register(&helper_extend);
+}
+
+void nf_conntrack_helper_fini()
+{
+	nf_ct_extend_unregister(&helper_extend);
+}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d0fe3d7..3d56f36 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -856,23 +856,23 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[])
 		return 0;
 	}
 
-	if (!help) {
-		/* FIXME: we need to reallocate and rehash */
-		return -EBUSY;
-	}
-
 	helper = __nf_conntrack_helper_find_byname(helpname);
 	if (helper == NULL)
 		return -EINVAL;
 
-	if (help->helper == helper)
-		return 0;
-
-	if (help->helper)
-		return -EBUSY;
+	if (help) {
+		if (help->helper == helper)
+			return 0;
+		if (help->helper)
+			return -EBUSY;
+		/* need to zero data of old helper */
+		memset(&help->help, 0, sizeof(help->help));
+	} else {
+		help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, GFP_KERNEL);
+		if (help == NULL)
+			return -ENOMEM;
+	}
 
-	/* need to zero data of old helper */
-	memset(&help->help, 0, sizeof(help->help));
 	rcu_assign_pointer(help->helper, helper);
 
 	return 0;
@@ -957,7 +957,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
 	struct nf_conn *ct;
 	int err = -EINVAL;
 	struct nf_conn_help *help;
-	struct nf_conntrack_helper *helper = NULL;
+	struct nf_conntrack_helper *helper;
 
 	ct = nf_conntrack_alloc(otuple, rtuple);
 	if (ct == NULL || IS_ERR(ct))
@@ -987,9 +987,14 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
 		ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
 #endif
 
-	help = nfct_help(ct);
-	if (help) {
-		helper = nf_ct_helper_find_get(rtuple);
+	helper = nf_ct_helper_find_get(rtuple);
+	if (helper) {
+		help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, GFP_KERNEL);
+		if (help == NULL) {
+			nf_ct_helper_put(helper);
+			err = -ENOMEM;
+			goto err;
+		}
 		/* not in hash table yet so not strictly necessary */
 		rcu_assign_pointer(help->helper, helper);
 	}
-- 
cgit v0.10.2


From e54cbc1f91dea4f98b6209e693d3b5eae46321bd Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sat, 7 Jul 2007 22:24:04 -0700
Subject: [NETFILTER]: nf_nat: add reference to conntrack from entry of
 bysource list

I will split 'struct nf_nat_info' out from conntrack. So I cannot use
'offsetof' to get the pointer to conntrack from it.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 47d3dc1..575dc8a 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -53,11 +53,14 @@ struct nf_nat_multi_range_compat
 #include <linux/list.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 
+struct nf_conn;
+
 /* The structure embedded in the conntrack structure. */
 struct nf_nat_info
 {
 	struct list_head bysource;
 	struct nf_nat_seq seq[IP_CT_DIR_MAX];
+	struct nf_conn *ct;
 };
 
 /* per conntrack: nat application helper private data */
@@ -77,8 +80,6 @@ struct nf_conn_nat
 #endif
 };
 
-struct nf_conn;
-
 /* Set up the info structure to map into this range. */
 extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
 				      const struct nf_nat_range *range,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index ea02f00..ac7e8abb 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -97,6 +97,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *conn)
 	nat = nfct_nat(conn);
 	write_lock_bh(&nf_nat_lock);
 	list_del(&nat->info.bysource);
+	nat->info.ct = NULL;
 	write_unlock_bh(&nf_nat_lock);
 }
 
@@ -169,7 +170,7 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
 
 	read_lock_bh(&nf_nat_lock);
 	list_for_each_entry(nat, &bysource[h], info.bysource) {
-		ct = (struct nf_conn *)((char *)nat - offsetof(struct nf_conn, data));
+		ct = nat->info.ct;
 		if (same_src(ct, tuple)) {
 			/* Copy source part from reply tuple. */
 			nf_ct_invert_tuplepr(result,
@@ -337,6 +338,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 
 		srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 		write_lock_bh(&nf_nat_lock);
+		info->ct = ct;
 		list_add(&info->bysource, &bysource[srchash]);
 		write_unlock_bh(&nf_nat_lock);
 	}
-- 
cgit v0.10.2


From 2d59e5ca8c7113ad91452f0f9259a4b55ee90323 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sat, 7 Jul 2007 22:24:28 -0700
Subject: [NETFILTER]: nf_nat: use extension infrastructure

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index f1e0fee..b2083d3 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -278,22 +278,5 @@ nf_conntrack_register_cache(u_int32_t features, const char *name, size_t size);
 extern void
 nf_conntrack_unregister_cache(u_int32_t features);
 
-/* valid combinations:
- * basic: nf_conn, nf_conn .. nf_conn_help
- * nat: nf_conn .. nf_conn_nat, nf_conn .. nf_conn_nat .. nf_conn help
- */
-#ifdef CONFIG_NF_NAT_NEEDED
-#include <net/netfilter/nf_nat.h>
-static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
-{
-	unsigned int offset = sizeof(struct nf_conn);
-
-	if (!(ct->features & NF_CT_F_NAT))
-		return NULL;
-
-	offset = ALIGN(offset, __alignof__(struct nf_conn_nat));
-	return (struct nf_conn_nat *) ((void *)ct + offset);
-}
-#endif /* CONFIG_NF_NAT_NEEDED */
 #endif /* __KERNEL__ */
 #endif /* _NF_CONNTRACK_H */
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index 05357dc..73b5711 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -6,10 +6,12 @@
 enum nf_ct_ext_id
 {
 	NF_CT_EXT_HELPER,
+	NF_CT_EXT_NAT,
 	NF_CT_EXT_NUM,
 };
 
 #define NF_CT_EXT_HELPER_TYPE struct nf_conn_help
+#define NF_CT_EXT_NAT_TYPE struct nf_conn_nat
 
 /* Extensions: optional stuff which isn't permanently in struct. */
 struct nf_ct_ext {
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 575dc8a..0425e28 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -52,6 +52,7 @@ struct nf_nat_multi_range_compat
 #ifdef __KERNEL__
 #include <linux/list.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
+#include <net/netfilter/nf_conntrack_extend.h>
 
 struct nf_conn;
 
@@ -89,6 +90,11 @@ extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
 extern int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
 			     const struct nf_conn *ignored_conntrack);
 
+static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
+{
+	return nf_ct_ext_find(ct, NF_CT_EXT_NAT);
+}
+
 extern int nf_nat_module_is_loaded;
 
 #else  /* !__KERNEL__: iptables wants this to compile. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 6dc72a8..96f641d 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -108,9 +108,6 @@ EXPORT_SYMBOL_GPL(nf_nat_module_is_loaded);
 
 static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple)
 {
-	if (nf_nat_module_is_loaded)
-		return NF_CT_F_NAT;
-
 	return NF_CT_F_BASIC;
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index ac7e8abb..4ce82d7 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -297,11 +297,21 @@ nf_nat_setup_info(struct nf_conn *ct,
 		  unsigned int hooknum)
 {
 	struct nf_conntrack_tuple curr_tuple, new_tuple;
-	struct nf_conn_nat *nat = nfct_nat(ct);
-	struct nf_nat_info *info = &nat->info;
+	struct nf_conn_nat *nat;
+	struct nf_nat_info *info;
 	int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
 	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
 
+	/* nat helper or nfctnetlink also setup binding */
+	nat = nfct_nat(ct);
+	if (!nat) {
+		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+		if (nat == NULL) {
+			DEBUGP("failed to add NAT extension\n");
+			return NF_ACCEPT;
+		}
+	}
+
 	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
 		     hooknum == NF_IP_POST_ROUTING ||
 		     hooknum == NF_IP_LOCAL_IN ||
@@ -338,6 +348,8 @@ nf_nat_setup_info(struct nf_conn *ct,
 
 		srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 		write_lock_bh(&nf_nat_lock);
+		/* nf_conntrack_alter_reply might re-allocate exntension aera */
+		info = &nfct_nat(ct)->info;
 		info->ct = ct;
 		list_add(&info->bysource, &bysource[srchash]);
 		write_unlock_bh(&nf_nat_lock);
@@ -592,17 +604,52 @@ nf_nat_port_nfattr_to_range(struct nfattr *tb[], struct nf_nat_range *range)
 EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr);
 #endif
 
+static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
+{
+	struct nf_conn_nat *new_nat = nf_ct_ext_find(conntrack, NF_CT_EXT_NAT);
+	struct nf_conn_nat *old_nat = (struct nf_conn_nat *)old;
+	struct nf_conn *ct = old_nat->info.ct;
+	unsigned int srchash;
+
+	if (!(ct->status & IPS_NAT_DONE_MASK))
+		return;
+
+	srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+
+	write_lock_bh(&nf_nat_lock);
+	list_replace(&old_nat->info.bysource, &new_nat->info.bysource);
+	new_nat->info.ct = ct;
+	write_unlock_bh(&nf_nat_lock);
+}
+
+struct nf_ct_ext_type nat_extend = {
+	.len	= sizeof(struct nf_conn_nat),
+	.align	= __alignof__(struct nf_conn_nat),
+	.move	= nf_nat_move_storage,
+	.id	= NF_CT_EXT_NAT,
+	.flags	= NF_CT_EXT_F_PREALLOC,
+};
+
 static int __init nf_nat_init(void)
 {
 	size_t i;
+	int ret;
+
+	ret = nf_ct_extend_register(&nat_extend);
+	if (ret < 0) {
+		printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
+		return ret;
+	}
 
 	/* Leave them the same for the moment. */
 	nf_nat_htable_size = nf_conntrack_htable_size;
 
 	/* One vmalloc for both hash tables */
 	bysource = vmalloc(sizeof(struct list_head) * nf_nat_htable_size);
-	if (!bysource)
-		return -ENOMEM;
+	if (!bysource) {
+		ret = -ENOMEM;
+		goto cleanup_extend;
+	}
 
 	/* Sew in builtin protocols. */
 	write_lock_bh(&nf_nat_lock);
@@ -626,6 +673,10 @@ static int __init nf_nat_init(void)
 
 	l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
 	return 0;
+
+ cleanup_extend:
+	nf_ct_extend_unregister(&nat_extend);
+	return ret;
 }
 
 /* Clear NAT section of all conntracks, in case we're loaded again. */
@@ -647,6 +698,7 @@ static void __exit nf_nat_cleanup(void)
 	synchronize_rcu();
 	vfree(bysource);
 	nf_ct_l3proto_put(l3proto);
+	nf_ct_extend_unregister(&nat_extend);
 }
 
 MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 0b2f0c3..51a2708 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -19,6 +19,7 @@
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_rule.h>
 #include <net/netfilter/nf_nat_protocol.h>
@@ -113,8 +114,13 @@ nf_nat_fn(unsigned int hooknum,
 		return NF_ACCEPT;
 
 	nat = nfct_nat(ct);
-	if (!nat)
-		return NF_ACCEPT;
+	if (!nat) {
+		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+		if (nat == NULL) {
+			DEBUGP("failed to add NAT extension\n");
+			return NF_ACCEPT;
+		}
+	}
 
 	switch (ctinfo) {
 	case IP_CT_RELATED:
@@ -326,18 +332,10 @@ static struct nf_hook_ops nf_nat_ops[] = {
 
 static int __init nf_nat_standalone_init(void)
 {
-	int size, ret = 0;
+	int ret = 0;
 
 	need_conntrack();
 
-	size = ALIGN(sizeof(struct nf_conn), __alignof__(struct nf_conn_nat)) +
-	       sizeof(struct nf_conn_nat);
-	ret = nf_conntrack_register_cache(NF_CT_F_NAT, "nf_nat:base", size);
-	if (ret < 0) {
-		printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n");
-		return ret;
-	}
-
 #ifdef CONFIG_XFRM
 	BUG_ON(ip_nat_decode_session != NULL);
 	ip_nat_decode_session = nat_decode_session;
@@ -362,7 +360,6 @@ static int __init nf_nat_standalone_init(void)
 	ip_nat_decode_session = NULL;
 	synchronize_net();
 #endif
-	nf_conntrack_unregister_cache(NF_CT_F_NAT);
 	return ret;
 }
 
-- 
cgit v0.10.2


From ff09b7493c8f433d3ffd6a31ad58d190f82ef0c5 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sat, 7 Jul 2007 22:25:28 -0700
Subject: [NETFILTER]: nf_nat: remove unused nf_nat_module_is_loaded

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 0425e28..0541eed 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -95,8 +95,6 @@ static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
 	return nf_ct_ext_find(ct, NF_CT_EXT_NAT);
 }
 
-extern int nf_nat_module_is_loaded;
-
 #else  /* !__KERNEL__: iptables wants this to compile. */
 #define nf_nat_multi_range nf_nat_multi_range_compat
 #endif /*__KERNEL__*/
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 96f641d..7411dd16 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -103,9 +103,6 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
 	return NF_ACCEPT;
 }
 
-int nf_nat_module_is_loaded = 0;
-EXPORT_SYMBOL_GPL(nf_nat_module_is_loaded);
-
 static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple)
 {
 	return NF_CT_F_BASIC;
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 51a2708..30eeaa4 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -350,7 +350,6 @@ static int __init nf_nat_standalone_init(void)
 		printk("nf_nat_init: can't register hooks.\n");
 		goto cleanup_rule_init;
 	}
-	nf_nat_module_is_loaded = 1;
 	return ret;
 
  cleanup_rule_init:
@@ -367,7 +366,6 @@ static void __exit nf_nat_standalone_fini(void)
 {
 	nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
 	nf_nat_rule_cleanup();
-	nf_nat_module_is_loaded = 0;
 #ifdef CONFIG_XFRM
 	ip_nat_decode_session = NULL;
 	synchronize_net();
-- 
cgit v0.10.2


From dacd2a1a5cf621288833aa3c6e815b86a1536538 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sat, 7 Jul 2007 22:25:51 -0700
Subject: [NETFILTER]: nf_conntrack: remove old memory allocator of conntrack

Now memory space for help and NAT are allocated by extension
infrastructure.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index b2083d3..71386e5 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -117,9 +117,6 @@ struct nf_conn
 	/* Unique ID that identifies this conntrack*/
 	unsigned int id;
 
-	/* features - nat, helper, ... used by allocating system */
-	u_int32_t features;
-
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	u_int32_t mark;
 #endif
@@ -133,9 +130,6 @@ struct nf_conn
 
 	/* Extensions */
 	struct nf_ct_ext *ext;
-
-	/* features dynamically at the end: helper, nat (both optional) */
-	char data[0];
 };
 
 static inline struct nf_conn *
@@ -265,14 +259,6 @@ do {							\
 	local_bh_enable();				\
 } while (0)
 
-/* no helper, no nat */
-#define	NF_CT_F_BASIC	0
-/* for helper */
-#define	NF_CT_F_HELP	1
-/* for nat. */
-#define	NF_CT_F_NAT	2
-#define NF_CT_F_NUM	4
-
 extern int
 nf_conntrack_register_cache(u_int32_t features, const char *name, size_t size);
 extern void
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 96a58d8..890752d 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -64,8 +64,6 @@ struct nf_conntrack_l3proto
 	int (*prepare)(struct sk_buff **pskb, unsigned int hooknum,
 		       unsigned int *dataoff, u_int8_t *protonum);
 
-	u_int32_t (*get_features)(const struct nf_conntrack_tuple *tuple);
-
 	int (*tuple_to_nfattr)(struct sk_buff *skb,
 			       const struct nf_conntrack_tuple *t);
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 7411dd16..129a8cc 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -103,11 +103,6 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
 	return NF_ACCEPT;
 }
 
-static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple)
-{
-	return NF_CT_F_BASIC;
-}
-
 static unsigned int ipv4_confirm(unsigned int hooknum,
 				 struct sk_buff **pskb,
 				 const struct net_device *in,
@@ -419,7 +414,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
 	.print_tuple	 = ipv4_print_tuple,
 	.print_conntrack = ipv4_print_conntrack,
 	.prepare	 = ipv4_prepare,
-	.get_features	 = ipv4_get_features,
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 	.tuple_to_nfattr = ipv4_tuple_to_nfattr,
 	.nfattr_to_tuple = ipv4_nfattr_to_tuple,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 1b1797f..747b01e 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -147,11 +147,6 @@ ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
 	return NF_ACCEPT;
 }
 
-static u_int32_t ipv6_get_features(const struct nf_conntrack_tuple *tuple)
-{
-	return NF_CT_F_BASIC;
-}
-
 static unsigned int ipv6_confirm(unsigned int hooknum,
 				 struct sk_buff **pskb,
 				 const struct net_device *in,
@@ -397,7 +392,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
 	.ctl_table_path		= nf_net_netfilter_sysctl_path,
 	.ctl_table		= nf_ct_ipv6_sysctl_table,
 #endif
-	.get_features		= ipv6_get_features,
 	.me			= THIS_MODULE,
 };
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 914506e..a713666 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -71,39 +71,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 unsigned int nf_ct_log_invalid __read_mostly;
 LIST_HEAD(unconfirmed);
 static int nf_conntrack_vmalloc __read_mostly;
-
+static struct kmem_cache *nf_conntrack_cachep __read_mostly;
 static unsigned int nf_conntrack_next_id;
 
 DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
 EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
 
-/*
- * This scheme offers various size of "struct nf_conn" dependent on
- * features(helper, nat, ...)
- */
-
-#define NF_CT_FEATURES_NAMELEN	256
-static struct {
-	/* name of slab cache. printed in /proc/slabinfo */
-	char *name;
-
-	/* size of slab cache */
-	size_t size;
-
-	/* slab cache pointer */
-	struct kmem_cache *cachep;
-
-	/* allocated slab cache + modules which uses this slab cache */
-	int use;
-
-} nf_ct_cache[NF_CT_F_NUM];
-
-/* protect members of nf_ct_cache except of "use" */
-DEFINE_RWLOCK(nf_ct_cache_lock);
-
-/* This avoids calling kmem_cache_create() with same name simultaneously */
-static DEFINE_MUTEX(nf_ct_cache_mutex);
-
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
@@ -126,122 +99,6 @@ static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
 				nf_conntrack_hash_rnd);
 }
 
-int nf_conntrack_register_cache(u_int32_t features, const char *name,
-				size_t size)
-{
-	int ret = 0;
-	char *cache_name;
-	struct kmem_cache *cachep;
-
-	DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n",
-	       features, name, size);
-
-	if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) {
-		DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n",
-			features);
-		return -EINVAL;
-	}
-
-	mutex_lock(&nf_ct_cache_mutex);
-
-	write_lock_bh(&nf_ct_cache_lock);
-	/* e.g: multiple helpers are loaded */
-	if (nf_ct_cache[features].use > 0) {
-		DEBUGP("nf_conntrack_register_cache: already resisterd.\n");
-		if ((!strncmp(nf_ct_cache[features].name, name,
-			      NF_CT_FEATURES_NAMELEN))
-		    && nf_ct_cache[features].size == size) {
-			DEBUGP("nf_conntrack_register_cache: reusing.\n");
-			nf_ct_cache[features].use++;
-			ret = 0;
-		} else
-			ret = -EBUSY;
-
-		write_unlock_bh(&nf_ct_cache_lock);
-		mutex_unlock(&nf_ct_cache_mutex);
-		return ret;
-	}
-	write_unlock_bh(&nf_ct_cache_lock);
-
-	/*
-	 * The memory space for name of slab cache must be alive until
-	 * cache is destroyed.
-	 */
-	cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC);
-	if (cache_name == NULL) {
-		DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n");
-		ret = -ENOMEM;
-		goto out_up_mutex;
-	}
-
-	if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN)
-						>= NF_CT_FEATURES_NAMELEN) {
-		printk("nf_conntrack_register_cache: name too long\n");
-		ret = -EINVAL;
-		goto out_free_name;
-	}
-
-	cachep = kmem_cache_create(cache_name, size, 0, 0,
-				   NULL, NULL);
-	if (!cachep) {
-		printk("nf_conntrack_register_cache: Can't create slab cache "
-		       "for the features = 0x%x\n", features);
-		ret = -ENOMEM;
-		goto out_free_name;
-	}
-
-	write_lock_bh(&nf_ct_cache_lock);
-	nf_ct_cache[features].use = 1;
-	nf_ct_cache[features].size = size;
-	nf_ct_cache[features].cachep = cachep;
-	nf_ct_cache[features].name = cache_name;
-	write_unlock_bh(&nf_ct_cache_lock);
-
-	goto out_up_mutex;
-
-out_free_name:
-	kfree(cache_name);
-out_up_mutex:
-	mutex_unlock(&nf_ct_cache_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nf_conntrack_register_cache);
-
-/* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */
-void nf_conntrack_unregister_cache(u_int32_t features)
-{
-	struct kmem_cache *cachep;
-	char *name;
-
-	/*
-	 * This assures that kmem_cache_create() isn't called before destroying
-	 * slab cache.
-	 */
-	DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features);
-	mutex_lock(&nf_ct_cache_mutex);
-
-	write_lock_bh(&nf_ct_cache_lock);
-	if (--nf_ct_cache[features].use > 0) {
-		write_unlock_bh(&nf_ct_cache_lock);
-		mutex_unlock(&nf_ct_cache_mutex);
-		return;
-	}
-	cachep = nf_ct_cache[features].cachep;
-	name = nf_ct_cache[features].name;
-	nf_ct_cache[features].cachep = NULL;
-	nf_ct_cache[features].name = NULL;
-	nf_ct_cache[features].size = 0;
-	write_unlock_bh(&nf_ct_cache_lock);
-
-	synchronize_net();
-
-	kmem_cache_destroy(cachep);
-	kfree(name);
-
-	mutex_unlock(&nf_ct_cache_mutex);
-}
-EXPORT_SYMBOL_GPL(nf_conntrack_unregister_cache);
-
 int
 nf_ct_get_tuple(const struct sk_buff *skb,
 		unsigned int nhoff,
@@ -559,11 +416,8 @@ static int early_drop(struct list_head *chain)
 	return dropped;
 }
 
-static struct nf_conn *
-__nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
-		     const struct nf_conntrack_tuple *repl,
-		     const struct nf_conntrack_l3proto *l3proto,
-		     u_int32_t features)
+struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
+				   const struct nf_conntrack_tuple *repl)
 {
 	struct nf_conn *conntrack = NULL;
 
@@ -589,65 +443,28 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 		}
 	}
 
-	/*  find features needed by this conntrack. */
-	features |= l3proto->get_features(orig);
-
-	DEBUGP("nf_conntrack_alloc: features=0x%x\n", features);
-
-	read_lock_bh(&nf_ct_cache_lock);
-
-	if (unlikely(!nf_ct_cache[features].use)) {
-		DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n",
-			features);
-		goto out;
-	}
-
-	conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC);
+	conntrack = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC);
 	if (conntrack == NULL) {
-		DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n");
-		goto out;
+		DEBUGP("nf_conntrack_alloc: Can't alloc conntrack.\n");
+		atomic_dec(&nf_conntrack_count);
+		return ERR_PTR(-ENOMEM);
 	}
 
-	memset(conntrack, 0, nf_ct_cache[features].size);
-	conntrack->features = features;
 	atomic_set(&conntrack->ct_general.use, 1);
 	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
 	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
 	/* Don't set timer yet: wait for confirmation */
 	setup_timer(&conntrack->timeout, death_by_timeout,
 		    (unsigned long)conntrack);
-	read_unlock_bh(&nf_ct_cache_lock);
 
 	return conntrack;
-out:
-	read_unlock_bh(&nf_ct_cache_lock);
-	atomic_dec(&nf_conntrack_count);
-	return conntrack;
-}
-
-struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
-				   const struct nf_conntrack_tuple *repl)
-{
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conn *ct;
-
-	rcu_read_lock();
-	l3proto = __nf_ct_l3proto_find(orig->src.l3num);
-	ct = __nf_conntrack_alloc(orig, repl, l3proto, 0);
-	rcu_read_unlock();
-
-	return ct;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
 
 void nf_conntrack_free(struct nf_conn *conntrack)
 {
-	u_int32_t features = conntrack->features;
-	NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM);
 	nf_ct_ext_free(conntrack);
-	DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features,
-	       conntrack);
-	kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
+	kmem_cache_free(nf_conntrack_cachep, conntrack);
 	atomic_dec(&nf_conntrack_count);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_free);
@@ -665,14 +482,13 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 	struct nf_conn_help *help;
 	struct nf_conntrack_tuple repl_tuple;
 	struct nf_conntrack_expect *exp;
-	u_int32_t features = 0;
 
 	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
 		DEBUGP("Can't invert tuple.\n");
 		return NULL;
 	}
 
-	conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto, features);
+	conntrack = nf_conntrack_alloc(tuple, &repl_tuple);
 	if (conntrack == NULL || IS_ERR(conntrack)) {
 		DEBUGP("Can't allocate conntrack.\n");
 		return (struct nf_conntrack_tuple_hash *)conntrack;
@@ -1128,8 +944,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_flush);
    supposed to kill the mall. */
 void nf_conntrack_cleanup(void)
 {
-	int i;
-
 	rcu_assign_pointer(ip_ct_attach, NULL);
 
 	/* This makes sure all current packets have passed through
@@ -1150,14 +964,7 @@ void nf_conntrack_cleanup(void)
 
 	rcu_assign_pointer(nf_ct_destroy, NULL);
 
-	for (i = 0; i < NF_CT_F_NUM; i++) {
-		if (nf_ct_cache[i].use == 0)
-			continue;
-
-		NF_CT_ASSERT(nf_ct_cache[i].use == 1);
-		nf_ct_cache[i].use = 1;
-		nf_conntrack_unregister_cache(i);
-	}
+	kmem_cache_destroy(nf_conntrack_cachep);
 	kmem_cache_destroy(nf_conntrack_expect_cachep);
 	free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
 			    nf_conntrack_htable_size);
@@ -1267,9 +1074,10 @@ int __init nf_conntrack_init(void)
 		goto err_out;
 	}
 
-	ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic",
-					  sizeof(struct nf_conn));
-	if (ret < 0) {
+	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
+						sizeof(struct nf_conn),
+						0, 0, NULL, NULL);
+	if (!nf_conntrack_cachep) {
 		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
 		goto err_free_hash;
 	}
@@ -1307,7 +1115,7 @@ out_fini_proto:
 out_free_expect_slab:
 	kmem_cache_destroy(nf_conntrack_expect_cachep);
 err_free_conntrack_slab:
-	nf_conntrack_unregister_cache(NF_CT_F_BASIC);
+	kmem_cache_destroy(nf_conntrack_cachep);
 err_free_hash:
 	free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
 			    nf_conntrack_htable_size);
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index cbd96f3..2fd0f11 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -76,12 +76,6 @@ generic_prepare(struct sk_buff **pskb, unsigned int hooknum,
 }
 
 
-static u_int32_t generic_get_features(const struct nf_conntrack_tuple *tuple)
-
-{
-	return NF_CT_F_BASIC;
-}
-
 struct nf_conntrack_l3proto nf_conntrack_l3proto_generic = {
 	.l3proto	 = PF_UNSPEC,
 	.name		 = "unknown",
@@ -90,6 +84,5 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_generic = {
 	.print_tuple	 = generic_print_tuple,
 	.print_conntrack = generic_print_conntrack,
 	.prepare	 = generic_prepare,
-	.get_features	 = generic_get_features,
 };
 EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
-- 
cgit v0.10.2


From d8a0509a696de60296a66ba4fe4f9eaade497103 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sat, 7 Jul 2007 22:26:16 -0700
Subject: [NETFILTER]: nf_nat: kill global 'destroy' operation

This kills the global 'destroy' operation which was used by NAT.
Instead it uses the extension infrastructure so that multiple
extensions can register own operations.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 71386e5..ef4a403 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -213,9 +213,6 @@ extern void nf_conntrack_tcp_update(struct sk_buff *skb,
 				    struct nf_conn *conntrack,
 				    int dir);
 
-/* Call me when a conntrack is destroyed. */
-extern void (*nf_conntrack_destroyed)(struct nf_conn *conntrack);
-
 /* Fake conntrack entry for untracked connections */
 extern struct nf_conn nf_conntrack_untracked;
 
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 4ce82d7..e370d15 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -87,20 +87,6 @@ hash_by_src(const struct nf_conntrack_tuple *tuple)
 			    tuple->dst.protonum, 0) % nf_nat_htable_size;
 }
 
-/* Noone using conntrack by the time this called. */
-static void nf_nat_cleanup_conntrack(struct nf_conn *conn)
-{
-	struct nf_conn_nat *nat;
-	if (!(conn->status & IPS_NAT_DONE_MASK))
-		return;
-
-	nat = nfct_nat(conn);
-	write_lock_bh(&nf_nat_lock);
-	list_del(&nat->info.bysource);
-	nat->info.ct = NULL;
-	write_unlock_bh(&nf_nat_lock);
-}
-
 /* Is this tuple already taken? (not by us) */
 int
 nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
@@ -604,6 +590,22 @@ nf_nat_port_nfattr_to_range(struct nfattr *tb[], struct nf_nat_range *range)
 EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr);
 #endif
 
+/* Noone using conntrack by the time this called. */
+static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
+{
+	struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
+
+	if (nat == NULL || nat->info.ct == NULL)
+		return;
+
+	NF_CT_ASSERT(nat->info.ct->status & IPS_NAT_DONE_MASK);
+
+	write_lock_bh(&nf_nat_lock);
+	list_del(&nat->info.bysource);
+	nat->info.ct = NULL;
+	write_unlock_bh(&nf_nat_lock);
+}
+
 static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
 {
 	struct nf_conn_nat *new_nat = nf_ct_ext_find(conntrack, NF_CT_EXT_NAT);
@@ -623,11 +625,12 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
 }
 
 struct nf_ct_ext_type nat_extend = {
-	.len	= sizeof(struct nf_conn_nat),
-	.align	= __alignof__(struct nf_conn_nat),
-	.move	= nf_nat_move_storage,
-	.id	= NF_CT_EXT_NAT,
-	.flags	= NF_CT_EXT_F_PREALLOC,
+	.len		= sizeof(struct nf_conn_nat),
+	.align		= __alignof__(struct nf_conn_nat),
+	.destroy	= nf_nat_cleanup_conntrack,
+	.move		= nf_nat_move_storage,
+	.id		= NF_CT_EXT_NAT,
+	.flags		= NF_CT_EXT_F_PREALLOC,
 };
 
 static int __init nf_nat_init(void)
@@ -664,10 +667,6 @@ static int __init nf_nat_init(void)
 		INIT_LIST_HEAD(&bysource[i]);
 	}
 
-	/* FIXME: Man, this is a hack.  <SIGH> */
-	NF_CT_ASSERT(rcu_dereference(nf_conntrack_destroyed) == NULL);
-	rcu_assign_pointer(nf_conntrack_destroyed, nf_nat_cleanup_conntrack);
-
 	/* Initialize fake conntrack so that NAT will skip it */
 	nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
 
@@ -694,7 +693,6 @@ static int clean_nat(struct nf_conn *i, void *data)
 static void __exit nf_nat_cleanup(void)
 {
 	nf_ct_iterate_cleanup(&clean_nat, NULL);
-	rcu_assign_pointer(nf_conntrack_destroyed, NULL);
 	synchronize_rcu();
 	vfree(bysource);
 	nf_ct_l3proto_put(l3proto);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index a713666..035eb9f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -53,9 +53,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_lock);
 atomic_t nf_conntrack_count = ATOMIC_INIT(0);
 EXPORT_SYMBOL_GPL(nf_conntrack_count);
 
-void (*nf_conntrack_destroyed)(struct nf_conn *conntrack);
-EXPORT_SYMBOL_GPL(nf_conntrack_destroyed);
-
 unsigned int nf_conntrack_htable_size __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 
@@ -157,7 +154,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
 {
 	struct nf_conn *ct = (struct nf_conn *)nfct;
 	struct nf_conntrack_l4proto *l4proto;
-	typeof(nf_conntrack_destroyed) destroyed;
 
 	DEBUGP("destroy_conntrack(%p)\n", ct);
 	NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
@@ -177,10 +173,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
 
 	nf_ct_ext_destroy(ct);
 
-	destroyed = rcu_dereference(nf_conntrack_destroyed);
-	if (destroyed)
-		destroyed(ct);
-
 	rcu_read_unlock();
 
 	write_lock_bh(&nf_conntrack_lock);
-- 
cgit v0.10.2


From b6b84d4a94e95727a4c65841eea23ac60c6aa329 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sat, 7 Jul 2007 22:26:35 -0700
Subject: [NETFILTER]: nf_nat: merge nf_conn and nf_nat_info

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 0541eed..d0e5e43 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -54,16 +54,6 @@ struct nf_nat_multi_range_compat
 #include <linux/netfilter/nf_conntrack_pptp.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 
-struct nf_conn;
-
-/* The structure embedded in the conntrack structure. */
-struct nf_nat_info
-{
-	struct list_head bysource;
-	struct nf_nat_seq seq[IP_CT_DIR_MAX];
-	struct nf_conn *ct;
-};
-
 /* per conntrack: nat application helper private data */
 union nf_conntrack_nat_help
 {
@@ -71,9 +61,14 @@ union nf_conntrack_nat_help
 	struct nf_nat_pptp nat_pptp_info;
 };
 
+struct nf_conn;
+
+/* The structure embedded in the conntrack structure. */
 struct nf_conn_nat
 {
-	struct nf_nat_info info;
+	struct list_head bysource;
+	struct nf_nat_seq seq[IP_CT_DIR_MAX];
+	struct nf_conn *ct;
 	union nf_conntrack_nat_help help;
 #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
     defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index e370d15..7e31777 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -155,8 +155,8 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
 	struct nf_conn *ct;
 
 	read_lock_bh(&nf_nat_lock);
-	list_for_each_entry(nat, &bysource[h], info.bysource) {
-		ct = nat->info.ct;
+	list_for_each_entry(nat, &bysource[h], bysource) {
+		ct = nat->ct;
 		if (same_src(ct, tuple)) {
 			/* Copy source part from reply tuple. */
 			nf_ct_invert_tuplepr(result,
@@ -284,7 +284,6 @@ nf_nat_setup_info(struct nf_conn *ct,
 {
 	struct nf_conntrack_tuple curr_tuple, new_tuple;
 	struct nf_conn_nat *nat;
-	struct nf_nat_info *info;
 	int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
 	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
 
@@ -335,9 +334,9 @@ nf_nat_setup_info(struct nf_conn *ct,
 		srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 		write_lock_bh(&nf_nat_lock);
 		/* nf_conntrack_alter_reply might re-allocate exntension aera */
-		info = &nfct_nat(ct)->info;
-		info->ct = ct;
-		list_add(&info->bysource, &bysource[srchash]);
+		nat = nfct_nat(ct);
+		nat->ct = ct;
+		list_add(&nat->bysource, &bysource[srchash]);
 		write_unlock_bh(&nf_nat_lock);
 	}
 
@@ -595,14 +594,14 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
 {
 	struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
 
-	if (nat == NULL || nat->info.ct == NULL)
+	if (nat == NULL || nat->ct == NULL)
 		return;
 
-	NF_CT_ASSERT(nat->info.ct->status & IPS_NAT_DONE_MASK);
+	NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK);
 
 	write_lock_bh(&nf_nat_lock);
-	list_del(&nat->info.bysource);
-	nat->info.ct = NULL;
+	list_del(&nat->bysource);
+	nat->ct = NULL;
 	write_unlock_bh(&nf_nat_lock);
 }
 
@@ -610,7 +609,7 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
 {
 	struct nf_conn_nat *new_nat = nf_ct_ext_find(conntrack, NF_CT_EXT_NAT);
 	struct nf_conn_nat *old_nat = (struct nf_conn_nat *)old;
-	struct nf_conn *ct = old_nat->info.ct;
+	struct nf_conn *ct = old_nat->ct;
 	unsigned int srchash;
 
 	if (!(ct->status & IPS_NAT_DONE_MASK))
@@ -619,8 +618,8 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
 	srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 
 	write_lock_bh(&nf_nat_lock);
-	list_replace(&old_nat->info.bysource, &new_nat->info.bysource);
-	new_nat->info.ct = ct;
+	list_replace(&old_nat->bysource, &new_nat->bysource);
+	new_nat->ct = ct;
 	write_unlock_bh(&nf_nat_lock);
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index ef0a99e..f3383fc 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -52,8 +52,8 @@ adjust_tcp_sequence(u32 seq,
 
 	dir = CTINFO2DIR(ctinfo);
 
-	this_way = &nat->info.seq[dir];
-	other_way = &nat->info.seq[!dir];
+	this_way = &nat->seq[dir];
+	other_way = &nat->seq[!dir];
 
 	DEBUGP("nf_nat_resize_packet: Seq_offset before: ");
 	DUMP_OFFSET(this_way);
@@ -372,8 +372,7 @@ nf_nat_sack_adjust(struct sk_buff **pskb,
 			    op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
 			    ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
 				sack_adjust(*pskb, tcph, optoff+2,
-					    optoff+op[1],
-					    &nat->info.seq[!dir]);
+					    optoff+op[1], &nat->seq[!dir]);
 			optoff += op[1];
 		}
 	}
@@ -394,8 +393,8 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
 
 	dir = CTINFO2DIR(ctinfo);
 
-	this_way = &nat->info.seq[dir];
-	other_way = &nat->info.seq[!dir];
+	this_way = &nat->seq[dir];
+	other_way = &nat->seq[!dir];
 
 	if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
 		return 0;
-- 
cgit v0.10.2


From 61eb3107cd8e0302f95aae26206e552365daf290 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:27:06 -0700
Subject: [NETFILTER]: nf_conntrack_extend: use __read_mostly for struct
 nf_ct_ext_type

Also make them static.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 7e31777..04691ed 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -623,7 +623,7 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
 	write_unlock_bh(&nf_nat_lock);
 }
 
-struct nf_ct_ext_type nat_extend = {
+static struct nf_ct_ext_type nat_extend __read_mostly = {
 	.len		= sizeof(struct nf_conn_nat),
 	.align		= __alignof__(struct nf_conn_nat),
 	.destroy	= nf_nat_cleanup_conntrack,
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 6d32399..dc352f5 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -145,7 +145,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
 
-struct nf_ct_ext_type helper_extend = {
+static struct nf_ct_ext_type helper_extend __read_mostly = {
 	.len	= sizeof(struct nf_conn_help),
 	.align	= __alignof__(struct nf_conn_help),
 	.id	= NF_CT_EXT_HELPER,
-- 
cgit v0.10.2


From 8e5105a0c36a059dfd0f0bb9e73ee7c97d306247 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:27:33 -0700
Subject: [NETFILTER]: nf_conntrack: round up hashsize to next multiple of
 PAGE_SIZE

Don't let the rest of the page go to waste.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 035eb9f..54acac5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -965,12 +965,14 @@ void nf_conntrack_cleanup(void)
 	nf_conntrack_helper_fini();
 }
 
-static struct list_head *alloc_hashtable(int size, int *vmalloced)
+static struct list_head *alloc_hashtable(int *sizep, int *vmalloced)
 {
 	struct list_head *hash;
-	unsigned int i;
+	unsigned int size, i;
 
 	*vmalloced = 0;
+
+	size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct list_head));
 	hash = (void*)__get_free_pages(GFP_KERNEL,
 				       get_order(sizeof(struct list_head)
 						 * size));
@@ -1003,7 +1005,7 @@ int set_hashsize(const char *val, struct kernel_param *kp)
 	if (!hashsize)
 		return -EINVAL;
 
-	hash = alloc_hashtable(hashsize, &vmalloced);
+	hash = alloc_hashtable(&hashsize, &vmalloced);
 	if (!hash)
 		return -ENOMEM;
 
@@ -1053,19 +1055,19 @@ int __init nf_conntrack_init(void)
 		if (nf_conntrack_htable_size < 16)
 			nf_conntrack_htable_size = 16;
 	}
-	nf_conntrack_max = 8 * nf_conntrack_htable_size;
-
-	printk("nf_conntrack version %s (%u buckets, %d max)\n",
-	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
-	       nf_conntrack_max);
-
-	nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size,
+	nf_conntrack_hash = alloc_hashtable(&nf_conntrack_htable_size,
 					    &nf_conntrack_vmalloc);
 	if (!nf_conntrack_hash) {
 		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
 		goto err_out;
 	}
 
+	nf_conntrack_max = 8 * nf_conntrack_htable_size;
+
+	printk("nf_conntrack version %s (%u buckets, %d max)\n",
+	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
+	       nf_conntrack_max);
+
 	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
 						sizeof(struct nf_conn),
 						0, 0, NULL, NULL);
-- 
cgit v0.10.2


From f205c5e0c28aa7e0fb6eaaa66e97928f9d9e6994 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:28:14 -0700
Subject: [NETFILTER]: nf_conntrack: use hlists for conntrack hash

Convert conntrack hash to hlists to reduce its size and cache
footprint. Since the default hashsize to max. entries ratio
sucks (1:16), this patch doesn't reduce the amount of memory
used for the hash by default, but instead uses a better ratio
of 1:8, which results in the same max. entries value.

One thing worth noting is early_drop. It really should use LRU,
so it now has to iterate over the entire chain to find the last
unconfirmed entry. Since chains shouldn't be very long and the
entire operation is very rare this shouldn't be a problem.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 3bf7d05..6351948 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -84,9 +84,9 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
 	    struct nf_conntrack_l3proto *l3proto,
 	    struct nf_conntrack_l4proto *proto);
 
-extern struct list_head *nf_conntrack_hash;
+extern struct hlist_head *nf_conntrack_hash;
 extern struct list_head nf_conntrack_expect_list;
 extern rwlock_t nf_conntrack_lock ;
-extern struct list_head unconfirmed;
+extern struct hlist_head unconfirmed;
 
 #endif /* _NF_CONNTRACK_CORE_H */
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index 5d72b16..d02ce87 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -125,8 +125,7 @@ DEBUGP("tuple %p: %u %u " NIP6_FMT " %hu -> " NIP6_FMT " %hu\n",	    \
 /* Connections have two entries in the hash table: one for each way */
 struct nf_conntrack_tuple_hash
 {
-	struct list_head list;
-
+	struct hlist_node hnode;
 	struct nf_conntrack_tuple tuple;
 };
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 89f933e..888f27f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -41,35 +41,36 @@ struct ct_iter_state {
 	unsigned int bucket;
 };
 
-static struct list_head *ct_get_first(struct seq_file *seq)
+static struct hlist_node *ct_get_first(struct seq_file *seq)
 {
 	struct ct_iter_state *st = seq->private;
 
 	for (st->bucket = 0;
 	     st->bucket < nf_conntrack_htable_size;
 	     st->bucket++) {
-		if (!list_empty(&nf_conntrack_hash[st->bucket]))
-			return nf_conntrack_hash[st->bucket].next;
+		if (!hlist_empty(&nf_conntrack_hash[st->bucket]))
+			return nf_conntrack_hash[st->bucket].first;
 	}
 	return NULL;
 }
 
-static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
+static struct hlist_node *ct_get_next(struct seq_file *seq,
+				      struct hlist_node *head)
 {
 	struct ct_iter_state *st = seq->private;
 
 	head = head->next;
-	while (head == &nf_conntrack_hash[st->bucket]) {
+	while (head == NULL) {
 		if (++st->bucket >= nf_conntrack_htable_size)
 			return NULL;
-		head = nf_conntrack_hash[st->bucket].next;
+		head = nf_conntrack_hash[st->bucket].first;
 	}
 	return head;
 }
 
-static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
+static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos)
 {
-	struct list_head *head = ct_get_first(seq);
+	struct hlist_node *head = ct_get_first(seq);
 
 	if (head)
 		while (pos && (head = ct_get_next(seq, head)))
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 54acac5..992d0ef 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -59,14 +59,14 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
 
-struct list_head *nf_conntrack_hash __read_mostly;
+struct hlist_head *nf_conntrack_hash __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_hash);
 
 struct nf_conn nf_conntrack_untracked __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
 unsigned int nf_ct_log_invalid __read_mostly;
-LIST_HEAD(unconfirmed);
+HLIST_HEAD(unconfirmed);
 static int nf_conntrack_vmalloc __read_mostly;
 static struct kmem_cache *nf_conntrack_cachep __read_mostly;
 static unsigned int nf_conntrack_next_id;
@@ -142,8 +142,8 @@ static void
 clean_from_lists(struct nf_conn *ct)
 {
 	DEBUGP("clean_from_lists(%p)\n", ct);
-	list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-	list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
+	hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
+	hlist_del(&ct->tuplehash[IP_CT_DIR_REPLY].hnode);
 
 	/* Destroy all pending expectations */
 	nf_ct_remove_expectations(ct);
@@ -184,8 +184,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
 
 	/* We overload first tuple to link into unconfirmed list. */
 	if (!nf_ct_is_confirmed(ct)) {
-		BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
-		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+		BUG_ON(hlist_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode));
+		hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
 	}
 
 	NF_CT_STAT_INC(delete);
@@ -226,9 +226,10 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
 		    const struct nf_conn *ignored_conntrack)
 {
 	struct nf_conntrack_tuple_hash *h;
+	struct hlist_node *n;
 	unsigned int hash = hash_conntrack(tuple);
 
-	list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
+	hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) {
 		if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
 		    nf_ct_tuple_equal(tuple, &h->tuple)) {
 			NF_CT_STAT_INC(found);
@@ -263,10 +264,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
 				       unsigned int repl_hash)
 {
 	ct->id = ++nf_conntrack_next_id;
-	list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
-		 &nf_conntrack_hash[hash]);
-	list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
-		 &nf_conntrack_hash[repl_hash]);
+	hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
+		       &nf_conntrack_hash[hash]);
+	hlist_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnode,
+		       &nf_conntrack_hash[repl_hash]);
 }
 
 void nf_conntrack_hash_insert(struct nf_conn *ct)
@@ -290,6 +291,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb)
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 	struct nf_conn_help *help;
+	struct hlist_node *n;
 	enum ip_conntrack_info ctinfo;
 
 	ct = nf_ct_get(*pskb, &ctinfo);
@@ -319,17 +321,17 @@ __nf_conntrack_confirm(struct sk_buff **pskb)
 	/* See if there's one in the list already, including reverse:
 	   NAT could have grabbed it without realizing, since we're
 	   not in the hash.  If there is, we lost race. */
-	list_for_each_entry(h, &nf_conntrack_hash[hash], list)
+	hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode)
 		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
 				      &h->tuple))
 			goto out;
-	list_for_each_entry(h, &nf_conntrack_hash[repl_hash], list)
+	hlist_for_each_entry(h, n, &nf_conntrack_hash[repl_hash], hnode)
 		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
 				      &h->tuple))
 			goto out;
 
 	/* Remove from unconfirmed list */
-	list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+	hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
 
 	__nf_conntrack_hash_insert(ct, hash, repl_hash);
 	/* Timer relative to confirmation time, not original
@@ -378,22 +380,22 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
 
 /* There's a small race here where we may free a just-assured
    connection.  Too bad: we're in trouble anyway. */
-static int early_drop(struct list_head *chain)
+static int early_drop(struct hlist_head *chain)
 {
-	/* Traverse backwards: gives us oldest, which is roughly LRU */
+	/* Use oldest entry, which is roughly LRU */
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct = NULL, *tmp;
+	struct hlist_node *n;
 	int dropped = 0;
 
 	read_lock_bh(&nf_conntrack_lock);
-	list_for_each_entry_reverse(h, chain, list) {
+	hlist_for_each_entry(h, n, chain, hnode) {
 		tmp = nf_ct_tuplehash_to_ctrack(h);
-		if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
+		if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
 			ct = tmp;
-			atomic_inc(&ct->ct_general.use);
-			break;
-		}
 	}
+	if (ct)
+		atomic_inc(&ct->ct_general.use);
 	read_unlock_bh(&nf_conntrack_lock);
 
 	if (!ct)
@@ -535,7 +537,8 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 	}
 
 	/* Overload tuple linked list to put us in unconfirmed list. */
-	list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
+	hlist_add_head(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
+		       &unconfirmed);
 
 	write_unlock_bh(&nf_conntrack_lock);
 
@@ -873,16 +876,17 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
+	struct hlist_node *n;
 
 	write_lock_bh(&nf_conntrack_lock);
 	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
-		list_for_each_entry(h, &nf_conntrack_hash[*bucket], list) {
+		hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) {
 			ct = nf_ct_tuplehash_to_ctrack(h);
 			if (iter(ct, data))
 				goto found;
 		}
 	}
-	list_for_each_entry(h, &unconfirmed, list) {
+	hlist_for_each_entry(h, n, &unconfirmed, hnode) {
 		ct = nf_ct_tuplehash_to_ctrack(h);
 		if (iter(ct, data))
 			set_bit(IPS_DYING_BIT, &ct->status);
@@ -917,13 +921,14 @@ static int kill_all(struct nf_conn *i, void *data)
 	return 1;
 }
 
-static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
+static void free_conntrack_hash(struct hlist_head *hash, int vmalloced,
+				int size)
 {
 	if (vmalloced)
 		vfree(hash);
 	else
 		free_pages((unsigned long)hash,
-			   get_order(sizeof(struct list_head) * size));
+			   get_order(sizeof(struct hlist_head) * size));
 }
 
 void nf_conntrack_flush(void)
@@ -965,26 +970,26 @@ void nf_conntrack_cleanup(void)
 	nf_conntrack_helper_fini();
 }
 
-static struct list_head *alloc_hashtable(int *sizep, int *vmalloced)
+static struct hlist_head *alloc_hashtable(int *sizep, int *vmalloced)
 {
-	struct list_head *hash;
+	struct hlist_head *hash;
 	unsigned int size, i;
 
 	*vmalloced = 0;
 
-	size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct list_head));
+	size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_head));
 	hash = (void*)__get_free_pages(GFP_KERNEL,
-				       get_order(sizeof(struct list_head)
+				       get_order(sizeof(struct hlist_head)
 						 * size));
 	if (!hash) {
 		*vmalloced = 1;
 		printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
-		hash = vmalloc(sizeof(struct list_head) * size);
+		hash = vmalloc(sizeof(struct hlist_head) * size);
 	}
 
 	if (hash)
 		for (i = 0; i < size; i++)
-			INIT_LIST_HEAD(&hash[i]);
+			INIT_HLIST_HEAD(&hash[i]);
 
 	return hash;
 }
@@ -994,7 +999,7 @@ int set_hashsize(const char *val, struct kernel_param *kp)
 	int i, bucket, hashsize, vmalloced;
 	int old_vmalloced, old_size;
 	int rnd;
-	struct list_head *hash, *old_hash;
+	struct hlist_head *hash, *old_hash;
 	struct nf_conntrack_tuple_hash *h;
 
 	/* On boot, we can set this without any fancy locking. */
@@ -1015,12 +1020,12 @@ int set_hashsize(const char *val, struct kernel_param *kp)
 
 	write_lock_bh(&nf_conntrack_lock);
 	for (i = 0; i < nf_conntrack_htable_size; i++) {
-		while (!list_empty(&nf_conntrack_hash[i])) {
-			h = list_entry(nf_conntrack_hash[i].next,
-				       struct nf_conntrack_tuple_hash, list);
-			list_del(&h->list);
+		while (!hlist_empty(&nf_conntrack_hash[i])) {
+			h = hlist_entry(nf_conntrack_hash[i].first,
+					struct nf_conntrack_tuple_hash, hnode);
+			hlist_del(&h->hnode);
 			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
-			list_add_tail(&h->list, &hash[bucket]);
+			hlist_add_head(&h->hnode, &hash[bucket]);
 		}
 	}
 	old_size = nf_conntrack_htable_size;
@@ -1042,18 +1047,25 @@ module_param_call(hashsize, set_hashsize, param_get_uint,
 
 int __init nf_conntrack_init(void)
 {
+	int max_factor = 8;
 	int ret;
 
 	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
-	 * machine has 256 buckets.  >= 1GB machines have 8192 buckets. */
+	 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
 	if (!nf_conntrack_htable_size) {
 		nf_conntrack_htable_size
 			= (((num_physpages << PAGE_SHIFT) / 16384)
-			   / sizeof(struct list_head));
+			   / sizeof(struct hlist_head));
 		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
-			nf_conntrack_htable_size = 8192;
-		if (nf_conntrack_htable_size < 16)
-			nf_conntrack_htable_size = 16;
+			nf_conntrack_htable_size = 16384;
+		if (nf_conntrack_htable_size < 32)
+			nf_conntrack_htable_size = 32;
+
+		/* Use a max. factor of four by default to get the same max as
+		 * with the old struct list_heads. When a table size is given
+		 * we use the old value of 8 to avoid reducing the max.
+		 * entries. */
+		max_factor = 4;
 	}
 	nf_conntrack_hash = alloc_hashtable(&nf_conntrack_htable_size,
 					    &nf_conntrack_vmalloc);
@@ -1062,7 +1074,7 @@ int __init nf_conntrack_init(void)
 		goto err_out;
 	}
 
-	nf_conntrack_max = 8 * nf_conntrack_htable_size;
+	nf_conntrack_max = max_factor * nf_conntrack_htable_size;
 
 	printk("nf_conntrack version %s (%u buckets, %d max)\n",
 	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index dc352f5..3fc6e9f 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -116,6 +116,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 	unsigned int i;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp, *tmp;
+	struct hlist_node *n;
 
 	/* Need write lock here, to delete helper. */
 	write_lock_bh(&nf_conntrack_lock);
@@ -132,10 +133,10 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 	}
 
 	/* Get rid of expecteds, set helpers to NULL. */
-	list_for_each_entry(h, &unconfirmed, list)
+	hlist_for_each_entry(h, n, &unconfirmed, hnode)
 		unhelp(h, me);
 	for (i = 0; i < nf_conntrack_htable_size; i++) {
-		list_for_each_entry(h, &nf_conntrack_hash[i], list)
+		hlist_for_each_entry(h, n, &nf_conntrack_hash[i], hnode)
 			unhelp(h, me);
 	}
 	write_unlock_bh(&nf_conntrack_lock);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 3d56f36..0627559 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -428,7 +428,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct nf_conn *ct, *last;
 	struct nf_conntrack_tuple_hash *h;
-	struct list_head *i;
+	struct hlist_node *n;
 	struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
 	u_int8_t l3proto = nfmsg->nfgen_family;
 
@@ -436,8 +436,8 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	last = (struct nf_conn *)cb->args[1];
 	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
 restart:
-		list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
-			h = (struct nf_conntrack_tuple_hash *) i;
+		hlist_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]],
+				     hnode) {
 			if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
 			ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 45baeb0..fe536b2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -60,35 +60,36 @@ struct ct_iter_state {
 	unsigned int bucket;
 };
 
-static struct list_head *ct_get_first(struct seq_file *seq)
+static struct hlist_node *ct_get_first(struct seq_file *seq)
 {
 	struct ct_iter_state *st = seq->private;
 
 	for (st->bucket = 0;
 	     st->bucket < nf_conntrack_htable_size;
 	     st->bucket++) {
-		if (!list_empty(&nf_conntrack_hash[st->bucket]))
-			return nf_conntrack_hash[st->bucket].next;
+		if (!hlist_empty(&nf_conntrack_hash[st->bucket]))
+			return nf_conntrack_hash[st->bucket].first;
 	}
 	return NULL;
 }
 
-static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
+static struct hlist_node *ct_get_next(struct seq_file *seq,
+				      struct hlist_node *head)
 {
 	struct ct_iter_state *st = seq->private;
 
 	head = head->next;
-	while (head == &nf_conntrack_hash[st->bucket]) {
+	while (head == NULL) {
 		if (++st->bucket >= nf_conntrack_htable_size)
 			return NULL;
-		head = nf_conntrack_hash[st->bucket].next;
+		head = nf_conntrack_hash[st->bucket].first;
 	}
 	return head;
 }
 
-static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
+static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos)
 {
-	struct list_head *head = ct_get_first(seq);
+	struct hlist_node *head = ct_get_first(seq);
 
 	if (head)
 		while (pos && (head = ct_get_next(seq, head)))
-- 
cgit v0.10.2


From 330f7db5e578e1e298ba3a41748e5ea333a64a2b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:28:42 -0700
Subject: [NETFILTER]: nf_conntrack: remove 'ignore_conntrack' argument from
 nf_conntrack_find_get

All callers pass NULL, this also doesn't seem very useful for modules.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 6351948..2fa3a1b 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -58,8 +58,7 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 
 /* Find a connection corresponding to a tuple. */
 extern struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
-		      const struct nf_conn *ignored_conntrack);
+nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple);
 
 extern int __nf_conntrack_confirm(struct sk_buff **pskb);
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 129a8cc..a103f59 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -334,7 +334,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 		return -EINVAL;
 	}
 
-	h = nf_conntrack_find_get(&tuple, NULL);
+	h = nf_conntrack_find_get(&tuple);
 	if (h) {
 		struct sockaddr_in sin;
 		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index f4fc657..91fb277 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -187,13 +187,13 @@ icmp_error_message(struct sk_buff *skb,
 
 	*ctinfo = IP_CT_RELATED;
 
-	h = nf_conntrack_find_get(&innertuple, NULL);
+	h = nf_conntrack_find_get(&innertuple);
 	if (!h) {
 		/* Locally generated ICMPs will match inverted if they
 		   haven't been SNAT'ed yet */
 		/* FIXME: NAT code has to handle half-done double NAT --RR */
 		if (hooknum == NF_IP_LOCAL_OUT)
-			h = nf_conntrack_find_get(&origtuple, NULL);
+			h = nf_conntrack_find_get(&origtuple);
 
 		if (!h) {
 			DEBUGP("icmp_error_message: no match\n");
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 8814b95..a514661 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -193,7 +193,7 @@ icmpv6_error_message(struct sk_buff *skb,
 
 	*ctinfo = IP_CT_RELATED;
 
-	h = nf_conntrack_find_get(&intuple, NULL);
+	h = nf_conntrack_find_get(&intuple);
 	if (!h) {
 		DEBUGP("icmpv6_error: no match\n");
 		return -NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 992d0ef..8ed761c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -244,13 +244,12 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find);
 
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
-		      const struct nf_conn *ignored_conntrack)
+nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 
 	read_lock_bh(&nf_conntrack_lock);
-	h = __nf_conntrack_find(tuple, ignored_conntrack);
+	h = __nf_conntrack_find(tuple, NULL);
 	if (h)
 		atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
 	read_unlock_bh(&nf_conntrack_lock);
@@ -574,7 +573,7 @@ resolve_normal_ct(struct sk_buff *skb,
 	}
 
 	/* look for tuple match */
-	h = nf_conntrack_find_get(&tuple, NULL);
+	h = nf_conntrack_find_get(&tuple);
 	if (!h) {
 		h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff);
 		if (!h)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0627559..d310ec8 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -689,7 +689,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(&tuple, NULL);
+	h = nf_conntrack_find_get(&tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -744,7 +744,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(&tuple, NULL);
+	h = nf_conntrack_find_get(&tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -1426,7 +1426,7 @@ ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3)
 		return err;
 
 	/* Look for master conntrack of this expectation */
-	h = nf_conntrack_find_get(&master_tuple, NULL);
+	h = nf_conntrack_find_get(&master_tuple);
 	if (!h)
 		return -ENOENT;
 	ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 115bcb5..da36c48 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -146,7 +146,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
 	DEBUGP("trying to timeout ct or exp for tuple ");
 	NF_CT_DUMP_TUPLE(t);
 
-	h = nf_conntrack_find_get(t, NULL);
+	h = nf_conntrack_find_get(t);
 	if (h)  {
 		sibling = nf_ct_tuplehash_to_ctrack(h);
 		DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
-- 
cgit v0.10.2


From ac565e5fc104fe1842a87f2206fcfb7b6dda903d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:30:08 -0700
Subject: [NETFILTER]: nf_conntrack: export hash allocation/destruction
 functions

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index ef4a403..8f2cbb9 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -172,6 +172,10 @@ static inline void nf_ct_put(struct nf_conn *ct)
 extern int nf_ct_l3proto_try_module_get(unsigned short l3proto);
 extern void nf_ct_l3proto_module_put(unsigned short l3proto);
 
+extern struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced);
+extern void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced,
+				 int size);
+
 extern struct nf_conntrack_tuple_hash *
 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
 		    const struct nf_conn *ignored_conntrack);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8ed761c..f4c3039 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -920,8 +920,7 @@ static int kill_all(struct nf_conn *i, void *data)
 	return 1;
 }
 
-static void free_conntrack_hash(struct hlist_head *hash, int vmalloced,
-				int size)
+void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, int size)
 {
 	if (vmalloced)
 		vfree(hash);
@@ -929,6 +928,7 @@ static void free_conntrack_hash(struct hlist_head *hash, int vmalloced,
 		free_pages((unsigned long)hash,
 			   get_order(sizeof(struct hlist_head) * size));
 }
+EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
 
 void nf_conntrack_flush(void)
 {
@@ -962,14 +962,14 @@ void nf_conntrack_cleanup(void)
 
 	kmem_cache_destroy(nf_conntrack_cachep);
 	kmem_cache_destroy(nf_conntrack_expect_cachep);
-	free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
-			    nf_conntrack_htable_size);
+	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc,
+			     nf_conntrack_htable_size);
 
 	nf_conntrack_proto_fini();
 	nf_conntrack_helper_fini();
 }
 
-static struct hlist_head *alloc_hashtable(int *sizep, int *vmalloced)
+struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced)
 {
 	struct hlist_head *hash;
 	unsigned int size, i;
@@ -992,6 +992,7 @@ static struct hlist_head *alloc_hashtable(int *sizep, int *vmalloced)
 
 	return hash;
 }
+EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
 
 int set_hashsize(const char *val, struct kernel_param *kp)
 {
@@ -1009,7 +1010,7 @@ int set_hashsize(const char *val, struct kernel_param *kp)
 	if (!hashsize)
 		return -EINVAL;
 
-	hash = alloc_hashtable(&hashsize, &vmalloced);
+	hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced);
 	if (!hash)
 		return -ENOMEM;
 
@@ -1037,7 +1038,7 @@ int set_hashsize(const char *val, struct kernel_param *kp)
 	nf_conntrack_hash_rnd = rnd;
 	write_unlock_bh(&nf_conntrack_lock);
 
-	free_conntrack_hash(old_hash, old_vmalloced, old_size);
+	nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
 	return 0;
 }
 
@@ -1066,8 +1067,8 @@ int __init nf_conntrack_init(void)
 		 * entries. */
 		max_factor = 4;
 	}
-	nf_conntrack_hash = alloc_hashtable(&nf_conntrack_htable_size,
-					    &nf_conntrack_vmalloc);
+	nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
+						  &nf_conntrack_vmalloc);
 	if (!nf_conntrack_hash) {
 		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
 		goto err_out;
@@ -1122,8 +1123,8 @@ out_free_expect_slab:
 err_free_conntrack_slab:
 	kmem_cache_destroy(nf_conntrack_cachep);
 err_free_hash:
-	free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
-			    nf_conntrack_htable_size);
+	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc,
+			     nf_conntrack_htable_size);
 err_out:
 	return -ENOMEM;
 }
-- 
cgit v0.10.2


From 53aba5979e1d964c0234816eda2316f1c2e7946d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:30:27 -0700
Subject: [NETFILTER]: nf_nat: use hlists for bysource hash

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index d0e5e43..6ae52f7 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -66,7 +66,7 @@ struct nf_conn;
 /* The structure embedded in the conntrack structure. */
 struct nf_conn_nat
 {
-	struct list_head bysource;
+	struct hlist_node bysource;
 	struct nf_nat_seq seq[IP_CT_DIR_MAX];
 	struct nf_conn *ct;
 	union nf_conntrack_nat_help help;
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 04691ed..f242ac6 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -12,7 +12,6 @@
 #include <linux/types.h>
 #include <linux/timer.h>
 #include <linux/skbuff.h>
-#include <linux/vmalloc.h>
 #include <net/checksum.h>
 #include <net/icmp.h>
 #include <net/ip.h>
@@ -44,8 +43,9 @@ static struct nf_conntrack_l3proto *l3proto = NULL;
 
 /* Calculated at init based on memory size */
 static unsigned int nf_nat_htable_size;
+static int nf_nat_vmalloced;
 
-static struct list_head *bysource;
+static struct hlist_head *bysource;
 
 #define MAX_IP_NAT_PROTO 256
 static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO];
@@ -153,9 +153,10 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
 	unsigned int h = hash_by_src(tuple);
 	struct nf_conn_nat *nat;
 	struct nf_conn *ct;
+	struct hlist_node *n;
 
 	read_lock_bh(&nf_nat_lock);
-	list_for_each_entry(nat, &bysource[h], bysource) {
+	hlist_for_each_entry(nat, n, &bysource[h], bysource) {
 		ct = nat->ct;
 		if (same_src(ct, tuple)) {
 			/* Copy source part from reply tuple. */
@@ -336,7 +337,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 		/* nf_conntrack_alter_reply might re-allocate exntension aera */
 		nat = nfct_nat(ct);
 		nat->ct = ct;
-		list_add(&nat->bysource, &bysource[srchash]);
+		hlist_add_head(&nat->bysource, &bysource[srchash]);
 		write_unlock_bh(&nf_nat_lock);
 	}
 
@@ -600,7 +601,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
 	NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK);
 
 	write_lock_bh(&nf_nat_lock);
-	list_del(&nat->bysource);
+	hlist_del(&nat->bysource);
 	nat->ct = NULL;
 	write_unlock_bh(&nf_nat_lock);
 }
@@ -618,7 +619,7 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
 	srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 
 	write_lock_bh(&nf_nat_lock);
-	list_replace(&old_nat->bysource, &new_nat->bysource);
+	hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
 	new_nat->ct = ct;
 	write_unlock_bh(&nf_nat_lock);
 }
@@ -646,8 +647,8 @@ static int __init nf_nat_init(void)
 	/* Leave them the same for the moment. */
 	nf_nat_htable_size = nf_conntrack_htable_size;
 
-	/* One vmalloc for both hash tables */
-	bysource = vmalloc(sizeof(struct list_head) * nf_nat_htable_size);
+	bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
+					 &nf_nat_vmalloced);
 	if (!bysource) {
 		ret = -ENOMEM;
 		goto cleanup_extend;
@@ -663,7 +664,7 @@ static int __init nf_nat_init(void)
 	write_unlock_bh(&nf_nat_lock);
 
 	for (i = 0; i < nf_nat_htable_size; i++) {
-		INIT_LIST_HEAD(&bysource[i]);
+		INIT_HLIST_HEAD(&bysource[i]);
 	}
 
 	/* Initialize fake conntrack so that NAT will skip it */
@@ -693,7 +694,7 @@ static void __exit nf_nat_cleanup(void)
 {
 	nf_ct_iterate_cleanup(&clean_nat, NULL);
 	synchronize_rcu();
-	vfree(bysource);
+	nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size);
 	nf_ct_l3proto_put(l3proto);
 	nf_ct_extend_unregister(&nat_extend);
 }
-- 
cgit v0.10.2


From 6823645d608541c2c69e8a99454936e058c294e0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:30:49 -0700
Subject: [NETFILTER]: nf_conntrack_expect: function naming unification

Currently there is a wild mix of nf_conntrack_expect_, nf_ct_exp_,
expect_, exp_, ...

Consistently use nf_ct_ as prefix for exported functions.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 2fa3a1b..a18f79c 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -84,7 +84,7 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
 	    struct nf_conntrack_l4proto *proto);
 
 extern struct hlist_head *nf_conntrack_hash;
-extern struct list_head nf_conntrack_expect_list;
+extern struct list_head nf_ct_expect_list;
 extern rwlock_t nf_conntrack_lock ;
 extern struct hlist_head unconfirmed;
 
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 811c907..f0b9078 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -49,15 +49,15 @@ static inline void nf_conntrack_event(enum ip_conntrack_events event,
 		atomic_notifier_call_chain(&nf_conntrack_chain, event, ct);
 }
 
-extern struct atomic_notifier_head nf_conntrack_expect_chain;
-extern int nf_conntrack_expect_register_notifier(struct notifier_block *nb);
-extern int nf_conntrack_expect_unregister_notifier(struct notifier_block *nb);
+extern struct atomic_notifier_head nf_ct_expect_chain;
+extern int nf_ct_expect_register_notifier(struct notifier_block *nb);
+extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb);
 
 static inline void
-nf_conntrack_expect_event(enum ip_conntrack_expect_events event,
-			  struct nf_conntrack_expect *exp)
+nf_ct_expect_event(enum ip_conntrack_expect_events event,
+		   struct nf_conntrack_expect *exp)
 {
-	atomic_notifier_call_chain(&nf_conntrack_expect_chain, event, exp);
+	atomic_notifier_call_chain(&nf_ct_expect_chain, event, exp);
 }
 
 #else /* CONFIG_NF_CONNTRACK_EVENTS */
@@ -67,9 +67,8 @@ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
 static inline void nf_conntrack_event(enum ip_conntrack_events event,
 				      struct nf_conn *ct) {}
 static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {}
-static inline void
-nf_conntrack_expect_event(enum ip_conntrack_expect_events event,
-			  struct nf_conntrack_expect *exp) {}
+static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event,
+				      struct nf_conntrack_expect *exp) {}
 static inline void nf_ct_event_cache_flush(void) {}
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
 
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 173c7c1..c0b1d1f 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -6,8 +6,8 @@
 #define _NF_CONNTRACK_EXPECT_H
 #include <net/netfilter/nf_conntrack.h>
 
-extern struct list_head nf_conntrack_expect_list;
-extern struct kmem_cache *nf_conntrack_expect_cachep;
+extern struct list_head nf_ct_expect_list;
+extern struct kmem_cache *nf_ct_expect_cachep;
 extern const struct file_operations exp_file_ops;
 
 struct nf_conntrack_expect
@@ -54,27 +54,27 @@ struct nf_conntrack_expect
 
 
 struct nf_conntrack_expect *
-__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple);
+__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple);
 
 struct nf_conntrack_expect *
-nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple);
+nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple);
 
 struct nf_conntrack_expect *
-find_expectation(const struct nf_conntrack_tuple *tuple);
+nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple);
 
 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp);
 void nf_ct_remove_expectations(struct nf_conn *ct);
-void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp);
+void nf_ct_unexpect_related(struct nf_conntrack_expect *exp);
 
 /* Allocate space for an expectation: this is mandatory before calling
-   nf_conntrack_expect_related.  You will have to call put afterwards. */
-struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me);
-void nf_conntrack_expect_init(struct nf_conntrack_expect *, int,
-			      union nf_conntrack_address *,
-			      union nf_conntrack_address *,
-			      u_int8_t, __be16 *, __be16 *);
-void nf_conntrack_expect_put(struct nf_conntrack_expect *exp);
-int nf_conntrack_expect_related(struct nf_conntrack_expect *expect);
+   nf_ct_expect_related.  You will have to call put afterwards. */
+struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me);
+void nf_ct_expect_init(struct nf_conntrack_expect *, int,
+		       union nf_conntrack_address *,
+		       union nf_conntrack_address *,
+		       u_int8_t, __be16 *, __be16 *);
+void nf_ct_expect_put(struct nf_conntrack_expect *exp);
+int nf_ct_expect_related(struct nf_conntrack_expect *expect);
 
 #endif /*_NF_CONNTRACK_EXPECT_H*/
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 888f27f..12d6a63 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -209,7 +209,7 @@ static const struct file_operations ct_file_ops = {
 /* expects */
 static void *exp_seq_start(struct seq_file *s, loff_t *pos)
 {
-	struct list_head *e = &nf_conntrack_expect_list;
+	struct list_head *e = &nf_ct_expect_list;
 	loff_t i;
 
 	/* strange seq_file api calls stop even if we fail,
@@ -221,7 +221,7 @@ static void *exp_seq_start(struct seq_file *s, loff_t *pos)
 
 	for (i = 0; i <= *pos; i++) {
 		e = e->next;
-		if (e == &nf_conntrack_expect_list)
+		if (e == &nf_ct_expect_list)
 			return NULL;
 	}
 	return e;
@@ -234,7 +234,7 @@ static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
 	++*pos;
 	e = e->next;
 
-	if (e == &nf_conntrack_expect_list)
+	if (e == &nf_ct_expect_list)
 		return NULL;
 
 	return e;
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 0f17098..bd93a1d 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -45,7 +45,7 @@ static unsigned int help(struct sk_buff **pskb,
 	/* Try to get same port: if not, try to change it. */
 	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
 		exp->tuple.dst.u.tcp.port = htons(port);
-		if (nf_conntrack_expect_related(exp) == 0)
+		if (nf_ct_expect_related(exp) == 0)
 			break;
 	}
 
@@ -57,7 +57,7 @@ static unsigned int help(struct sk_buff **pskb,
 				       matchoff, matchlen,
 				       buffer, strlen(buffer));
 	if (ret != NF_ACCEPT)
-		nf_conntrack_unexpect_related(exp);
+		nf_ct_unexpect_related(exp);
 	return ret;
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index e6bc8e5..cae4b46 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -131,7 +131,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb,
 	/* Try to get same port: if not, try to change it. */
 	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
 		exp->tuple.dst.u.tcp.port = htons(port);
-		if (nf_conntrack_expect_related(exp) == 0)
+		if (nf_ct_expect_related(exp) == 0)
 			break;
 	}
 
@@ -139,7 +139,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb,
 		return NF_DROP;
 
 	if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo)) {
-		nf_conntrack_unexpect_related(exp);
+		nf_ct_unexpect_related(exp);
 		return NF_DROP;
 	}
 	return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index c5d2a2d..3d760dd 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -237,12 +237,12 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
 	for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
 	     nated_port != 0; nated_port += 2) {
 		rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
-		if (nf_conntrack_expect_related(rtp_exp) == 0) {
+		if (nf_ct_expect_related(rtp_exp) == 0) {
 			rtcp_exp->tuple.dst.u.udp.port =
 			    htons(nated_port + 1);
-			if (nf_conntrack_expect_related(rtcp_exp) == 0)
+			if (nf_ct_expect_related(rtcp_exp) == 0)
 				break;
-			nf_conntrack_unexpect_related(rtp_exp);
+			nf_ct_unexpect_related(rtp_exp);
 		}
 	}
 
@@ -261,8 +261,8 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
 		info->rtp_port[i][dir] = rtp_port;
 		info->rtp_port[i][!dir] = htons(nated_port);
 	} else {
-		nf_conntrack_unexpect_related(rtp_exp);
-		nf_conntrack_unexpect_related(rtcp_exp);
+		nf_ct_unexpect_related(rtp_exp);
+		nf_ct_unexpect_related(rtcp_exp);
 		return -1;
 	}
 
@@ -299,7 +299,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct,
 	/* Try to get same port: if not, try to change it. */
 	for (; nated_port != 0; nated_port++) {
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		if (nf_conntrack_expect_related(exp) == 0)
+		if (nf_ct_expect_related(exp) == 0)
 			break;
 	}
 
@@ -313,7 +313,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct,
 	if (set_h245_addr(pskb, data, dataoff, taddr,
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons(nated_port)) < 0) {
-		nf_conntrack_unexpect_related(exp);
+		nf_ct_unexpect_related(exp);
 		return -1;
 	}
 
@@ -347,7 +347,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
 	/* Try to get same port: if not, try to change it. */
 	for (; nated_port != 0; nated_port++) {
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		if (nf_conntrack_expect_related(exp) == 0)
+		if (nf_ct_expect_related(exp) == 0)
 			break;
 	}
 
@@ -365,7 +365,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
 		info->sig_port[dir] = port;
 		info->sig_port[!dir] = htons(nated_port);
 	} else {
-		nf_conntrack_unexpect_related(exp);
+		nf_ct_unexpect_related(exp);
 		return -1;
 	}
 
@@ -433,7 +433,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct,
 	/* Try to get same port: if not, try to change it. */
 	for (; nated_port != 0; nated_port++) {
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		if (nf_conntrack_expect_related(exp) == 0)
+		if (nf_ct_expect_related(exp) == 0)
 			break;
 	}
 
@@ -460,7 +460,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct,
 				      info->sig_port[!dir]);
 		}
 	} else {
-		nf_conntrack_unexpect_related(exp);
+		nf_ct_unexpect_related(exp);
 		return -1;
 	}
 
@@ -517,7 +517,7 @@ static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct,
 	/* Try to get same port: if not, try to change it. */
 	for (nated_port = ntohs(port); nated_port != 0; nated_port++) {
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		if (nf_conntrack_expect_related(exp) == 0)
+		if (nf_ct_expect_related(exp) == 0)
 			break;
 	}
 
@@ -531,7 +531,7 @@ static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct,
 	if (!set_h225_addr(pskb, data, dataoff, taddr,
 			   &ct->tuplehash[!dir].tuple.dst.u3,
 			   htons(nated_port)) == 0) {
-		nf_conntrack_unexpect_related(exp);
+		nf_ct_unexpect_related(exp);
 		return -1;
 	}
 
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index 9b8c0da..db7fbf6 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -55,7 +55,7 @@ static unsigned int help(struct sk_buff **pskb,
 	/* Try to get same port: if not, try to change it. */
 	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
 		exp->tuple.dst.u.tcp.port = htons(port);
-		if (nf_conntrack_expect_related(exp) == 0)
+		if (nf_ct_expect_related(exp) == 0)
 			break;
 	}
 
@@ -71,7 +71,7 @@ static unsigned int help(struct sk_buff **pskb,
 				       matchoff, matchlen, buffer,
 				       strlen(buffer));
 	if (ret != NF_ACCEPT)
-		nf_conntrack_unexpect_related(exp);
+		nf_ct_unexpect_related(exp);
 	return ret;
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index a668887..deb80ae 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -81,10 +81,10 @@ static void pptp_nat_expected(struct nf_conn *ct,
 
 	DEBUGP("trying to unexpect other dir: ");
 	NF_CT_DUMP_TUPLE(&t);
-	other_exp = nf_conntrack_expect_find_get(&t);
+	other_exp = nf_ct_expect_find_get(&t);
 	if (other_exp) {
-		nf_conntrack_unexpect_related(other_exp);
-		nf_conntrack_expect_put(other_exp);
+		nf_ct_unexpect_related(other_exp);
+		nf_ct_expect_put(other_exp);
 		DEBUGP("success\n");
 	} else {
 		DEBUGP("not found!\n");
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index a32d746..940cdfc 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -278,7 +278,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb,
 	/* Try to get same port: if not, try to change it. */
 	for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) {
 		exp->tuple.dst.u.udp.port = htons(port);
-		if (nf_conntrack_expect_related(exp) == 0)
+		if (nf_ct_expect_related(exp) == 0)
 			break;
 	}
 
@@ -286,7 +286,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb,
 		return NF_DROP;
 
 	if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) {
-		nf_conntrack_unexpect_related(exp);
+		nf_ct_unexpect_related(exp);
 		return NF_DROP;
 	}
 	return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index 2566b79..04dfeae 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -30,7 +30,7 @@ static unsigned int help(struct sk_buff **pskb,
 		= ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
 	exp->dir = IP_CT_DIR_REPLY;
 	exp->expectfn = nf_nat_follow_master;
-	if (nf_conntrack_expect_related(exp) != 0)
+	if (nf_ct_expect_related(exp) != 0)
 		return NF_DROP;
 	return NF_ACCEPT;
 }
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 0568f2e..d21359e 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -142,23 +142,22 @@ static int amanda_help(struct sk_buff **pskb,
 		if (port == 0 || len > 5)
 			break;
 
-		exp = nf_conntrack_expect_alloc(ct);
+		exp = nf_ct_expect_alloc(ct);
 		if (exp == NULL) {
 			ret = NF_DROP;
 			goto out;
 		}
 		tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-		nf_conntrack_expect_init(exp, family,
-					 &tuple->src.u3, &tuple->dst.u3,
-					 IPPROTO_TCP, NULL, &port);
+		nf_ct_expect_init(exp, family, &tuple->src.u3, &tuple->dst.u3,
+				  IPPROTO_TCP, NULL, &port);
 
 		nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook);
 		if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
 			ret = nf_nat_amanda(pskb, ctinfo, off - dataoff,
 					    len, exp);
-		else if (nf_conntrack_expect_related(exp) != 0)
+		else if (nf_ct_expect_related(exp) != 0)
 			ret = NF_DROP;
-		nf_conntrack_expect_put(exp);
+		nf_ct_expect_put(exp);
 	}
 
 out:
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f4c3039..793f12f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -494,7 +494,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 	}
 
 	write_lock_bh(&nf_conntrack_lock);
-	exp = find_expectation(tuple);
+	exp = nf_ct_find_expectation(tuple);
 	if (exp) {
 		DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
 			conntrack, exp);
@@ -544,7 +544,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 	if (exp) {
 		if (exp->expectfn)
 			exp->expectfn(conntrack, exp);
-		nf_conntrack_expect_put(exp);
+		nf_ct_expect_put(exp);
 	}
 
 	return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
@@ -961,7 +961,7 @@ void nf_conntrack_cleanup(void)
 	rcu_assign_pointer(nf_ct_destroy, NULL);
 
 	kmem_cache_destroy(nf_conntrack_cachep);
-	kmem_cache_destroy(nf_conntrack_expect_cachep);
+	kmem_cache_destroy(nf_ct_expect_cachep);
 	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc,
 			     nf_conntrack_htable_size);
 
@@ -1088,10 +1088,10 @@ int __init nf_conntrack_init(void)
 		goto err_free_hash;
 	}
 
-	nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect",
+	nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
 					sizeof(struct nf_conntrack_expect),
 					0, 0, NULL, NULL);
-	if (!nf_conntrack_expect_cachep) {
+	if (!nf_ct_expect_cachep) {
 		printk(KERN_ERR "Unable to create nf_expect slab cache\n");
 		goto err_free_conntrack_slab;
 	}
@@ -1119,7 +1119,7 @@ int __init nf_conntrack_init(void)
 out_fini_proto:
 	nf_conntrack_proto_fini();
 out_free_expect_slab:
-	kmem_cache_destroy(nf_conntrack_expect_cachep);
+	kmem_cache_destroy(nf_ct_expect_cachep);
 err_free_conntrack_slab:
 	kmem_cache_destroy(nf_conntrack_cachep);
 err_free_hash:
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 6bd421d..83c41ac 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -26,8 +26,8 @@
 ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
 EXPORT_SYMBOL_GPL(nf_conntrack_chain);
 
-ATOMIC_NOTIFIER_HEAD(nf_conntrack_expect_chain);
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain);
+ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain);
+EXPORT_SYMBOL_GPL(nf_ct_expect_chain);
 
 DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
 EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache);
@@ -103,14 +103,14 @@ int nf_conntrack_unregister_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 
-int nf_conntrack_expect_register_notifier(struct notifier_block *nb)
+int nf_ct_expect_register_notifier(struct notifier_block *nb)
 {
-	return atomic_notifier_chain_register(&nf_conntrack_expect_chain, nb);
+	return atomic_notifier_chain_register(&nf_ct_expect_chain, nb);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_register_notifier);
+EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
 
-int nf_conntrack_expect_unregister_notifier(struct notifier_block *nb)
+int nf_ct_expect_unregister_notifier(struct notifier_block *nb)
 {
-	return atomic_notifier_chain_unregister(&nf_conntrack_expect_chain, nb);
+	return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_unregister_notifier);
+EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 504fb6c..4130ea6 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -26,11 +26,11 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 
-LIST_HEAD(nf_conntrack_expect_list);
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_list);
+LIST_HEAD(nf_ct_expect_list);
+EXPORT_SYMBOL_GPL(nf_ct_expect_list);
 
-struct kmem_cache *nf_conntrack_expect_cachep __read_mostly;
-static unsigned int nf_conntrack_expect_next_id;
+struct kmem_cache *nf_ct_expect_cachep __read_mostly;
+static unsigned int nf_ct_expect_next_id;
 
 /* nf_conntrack_expect helper functions */
 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
@@ -43,57 +43,57 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
 	list_del(&exp->list);
 	NF_CT_STAT_INC(expect_delete);
 	master_help->expecting--;
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_put(exp);
 }
 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
 
-static void expectation_timed_out(unsigned long ul_expect)
+static void nf_ct_expectation_timed_out(unsigned long ul_expect)
 {
 	struct nf_conntrack_expect *exp = (void *)ul_expect;
 
 	write_lock_bh(&nf_conntrack_lock);
 	nf_ct_unlink_expect(exp);
 	write_unlock_bh(&nf_conntrack_lock);
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_put(exp);
 }
 
 struct nf_conntrack_expect *
-__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
+__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
 
-	list_for_each_entry(i, &nf_conntrack_expect_list, list) {
+	list_for_each_entry(i, &nf_ct_expect_list, list) {
 		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
 			return i;
 	}
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(__nf_conntrack_expect_find);
+EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
 
 /* Just find a expectation corresponding to a tuple. */
 struct nf_conntrack_expect *
-nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple)
+nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
 
 	read_lock_bh(&nf_conntrack_lock);
-	i = __nf_conntrack_expect_find(tuple);
+	i = __nf_ct_expect_find(tuple);
 	if (i)
 		atomic_inc(&i->use);
 	read_unlock_bh(&nf_conntrack_lock);
 
 	return i;
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_find_get);
+EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
 
 /* If an expectation for this connection is found, it gets delete from
  * global list then returned. */
 struct nf_conntrack_expect *
-find_expectation(const struct nf_conntrack_tuple *tuple)
+nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *exp;
 
-	exp = __nf_conntrack_expect_find(tuple);
+	exp = __nf_ct_expect_find(tuple);
 	if (!exp)
 		return NULL;
 
@@ -126,10 +126,10 @@ void nf_ct_remove_expectations(struct nf_conn *ct)
 	if (!help || help->expecting == 0)
 		return;
 
-	list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
+	list_for_each_entry_safe(i, tmp, &nf_ct_expect_list, list) {
 		if (i->master == ct && del_timer(&i->timeout)) {
 			nf_ct_unlink_expect(i);
-			nf_conntrack_expect_put(i);
+			nf_ct_expect_put(i);
 		}
 	}
 }
@@ -172,32 +172,32 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
 }
 
 /* Generally a bad idea to call this: could have matched already. */
-void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
+void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
 {
 	struct nf_conntrack_expect *i;
 
 	write_lock_bh(&nf_conntrack_lock);
 	/* choose the oldest expectation to evict */
-	list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
+	list_for_each_entry_reverse(i, &nf_ct_expect_list, list) {
 		if (expect_matches(i, exp) && del_timer(&i->timeout)) {
 			nf_ct_unlink_expect(i);
 			write_unlock_bh(&nf_conntrack_lock);
-			nf_conntrack_expect_put(i);
+			nf_ct_expect_put(i);
 			return;
 		}
 	}
 	write_unlock_bh(&nf_conntrack_lock);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_unexpect_related);
+EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
 
 /* We don't increase the master conntrack refcount for non-fulfilled
  * conntracks. During the conntrack destruction, the expectations are
  * always killed before the conntrack itself */
-struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
+struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
 {
 	struct nf_conntrack_expect *new;
 
-	new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
+	new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
 	if (!new)
 		return NULL;
 
@@ -205,12 +205,12 @@ struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
 	atomic_set(&new->use, 1);
 	return new;
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_alloc);
+EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
 
-void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family,
-			      union nf_conntrack_address *saddr,
-			      union nf_conntrack_address *daddr,
-			      u_int8_t proto, __be16 *src, __be16 *dst)
+void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
+		       union nf_conntrack_address *saddr,
+		       union nf_conntrack_address *daddr,
+		       u_int8_t proto, __be16 *src, __be16 *dst)
 {
 	int len;
 
@@ -273,28 +273,29 @@ void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family,
 		exp->mask.dst.u.all = 0;
 	}
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_init);
+EXPORT_SYMBOL_GPL(nf_ct_expect_init);
 
-void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
+void nf_ct_expect_put(struct nf_conntrack_expect *exp)
 {
 	if (atomic_dec_and_test(&exp->use))
-		kmem_cache_free(nf_conntrack_expect_cachep, exp);
+		kmem_cache_free(nf_ct_expect_cachep, exp);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_put);
+EXPORT_SYMBOL_GPL(nf_ct_expect_put);
 
-static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
+static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 {
 	struct nf_conn_help *master_help = nfct_help(exp->master);
 
 	atomic_inc(&exp->use);
 	master_help->expecting++;
-	list_add(&exp->list, &nf_conntrack_expect_list);
+	list_add(&exp->list, &nf_ct_expect_list);
 
-	setup_timer(&exp->timeout, expectation_timed_out, (unsigned long)exp);
+	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
+		    (unsigned long)exp);
 	exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
 	add_timer(&exp->timeout);
 
-	exp->id = ++nf_conntrack_expect_next_id;
+	exp->id = ++nf_ct_expect_next_id;
 	atomic_inc(&exp->use);
 	NF_CT_STAT_INC(expect_create);
 }
@@ -304,11 +305,11 @@ static void evict_oldest_expect(struct nf_conn *master)
 {
 	struct nf_conntrack_expect *i;
 
-	list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
+	list_for_each_entry_reverse(i, &nf_ct_expect_list, list) {
 		if (i->master == master) {
 			if (del_timer(&i->timeout)) {
 				nf_ct_unlink_expect(i);
-				nf_conntrack_expect_put(i);
+				nf_ct_expect_put(i);
 			}
 			break;
 		}
@@ -327,7 +328,7 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
 	return 1;
 }
 
-int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
+int nf_ct_expect_related(struct nf_conntrack_expect *expect)
 {
 	struct nf_conntrack_expect *i;
 	struct nf_conn *master = expect->master;
@@ -341,7 +342,7 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
 		ret = -ESHUTDOWN;
 		goto out;
 	}
-	list_for_each_entry(i, &nf_conntrack_expect_list, list) {
+	list_for_each_entry(i, &nf_ct_expect_list, list) {
 		if (expect_matches(i, expect)) {
 			/* Refresh timer: if it's dying, ignore.. */
 			if (refresh_timer(i)) {
@@ -358,19 +359,19 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
 	    master_help->expecting >= master_help->helper->max_expected)
 		evict_oldest_expect(master);
 
-	nf_conntrack_expect_insert(expect);
-	nf_conntrack_expect_event(IPEXP_NEW, expect);
+	nf_ct_expect_insert(expect);
+	nf_ct_expect_event(IPEXP_NEW, expect);
 	ret = 0;
 out:
 	write_unlock_bh(&nf_conntrack_lock);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_related);
+EXPORT_SYMBOL_GPL(nf_ct_expect_related);
 
 #ifdef CONFIG_PROC_FS
 static void *exp_seq_start(struct seq_file *s, loff_t *pos)
 {
-	struct list_head *e = &nf_conntrack_expect_list;
+	struct list_head *e = &nf_ct_expect_list;
 	loff_t i;
 
 	/* strange seq_file api calls stop even if we fail,
@@ -382,7 +383,7 @@ static void *exp_seq_start(struct seq_file *s, loff_t *pos)
 
 	for (i = 0; i <= *pos; i++) {
 		e = e->next;
-		if (e == &nf_conntrack_expect_list)
+		if (e == &nf_ct_expect_list)
 			return NULL;
 	}
 	return e;
@@ -395,7 +396,7 @@ static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
 	++*pos;
 	e = e->next;
 
-	if (e == &nf_conntrack_expect_list)
+	if (e == &nf_ct_expect_list)
 		return NULL;
 
 	return e;
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 82db2aa..5efe65d 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -445,7 +445,7 @@ static int help(struct sk_buff **pskb,
 	       (int)matchlen, fb_ptr + matchoff,
 	       matchlen, ntohl(th->seq) + matchoff);
 
-	exp = nf_conntrack_expect_alloc(ct);
+	exp = nf_ct_expect_alloc(ct);
 	if (exp == NULL) {
 		ret = NF_DROP;
 		goto out;
@@ -523,14 +523,14 @@ static int help(struct sk_buff **pskb,
 				 matchoff, matchlen, exp);
 	else {
 		/* Can't expect this?  Best to drop packet now. */
-		if (nf_conntrack_expect_related(exp) != 0)
+		if (nf_ct_expect_related(exp) != 0)
 			ret = NF_DROP;
 		else
 			ret = NF_ACCEPT;
 	}
 
 out_put_expect:
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_put(exp);
 
 out_update_nl:
 	/* Now if this ends in \n, update ftp info.  Seq may have been
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index a1b95ac..61ae90f 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -282,22 +282,22 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
 	rtcp_port = htons(ntohs(port) + 1);
 
 	/* Create expect for RTP */
-	if ((rtp_exp = nf_conntrack_expect_alloc(ct)) == NULL)
+	if ((rtp_exp = nf_ct_expect_alloc(ct)) == NULL)
 		return -1;
-	nf_conntrack_expect_init(rtp_exp, ct->tuplehash[!dir].tuple.src.l3num,
-				 &ct->tuplehash[!dir].tuple.src.u3,
-				 &ct->tuplehash[!dir].tuple.dst.u3,
-				 IPPROTO_UDP, NULL, &rtp_port);
+	nf_ct_expect_init(rtp_exp, ct->tuplehash[!dir].tuple.src.l3num,
+			  &ct->tuplehash[!dir].tuple.src.u3,
+			  &ct->tuplehash[!dir].tuple.dst.u3,
+			  IPPROTO_UDP, NULL, &rtp_port);
 
 	/* Create expect for RTCP */
-	if ((rtcp_exp = nf_conntrack_expect_alloc(ct)) == NULL) {
-		nf_conntrack_expect_put(rtp_exp);
+	if ((rtcp_exp = nf_ct_expect_alloc(ct)) == NULL) {
+		nf_ct_expect_put(rtp_exp);
 		return -1;
 	}
-	nf_conntrack_expect_init(rtcp_exp, ct->tuplehash[!dir].tuple.src.l3num,
-				 &ct->tuplehash[!dir].tuple.src.u3,
-				 &ct->tuplehash[!dir].tuple.dst.u3,
-				 IPPROTO_UDP, NULL, &rtcp_port);
+	nf_ct_expect_init(rtcp_exp, ct->tuplehash[!dir].tuple.src.l3num,
+			  &ct->tuplehash[!dir].tuple.src.u3,
+			  &ct->tuplehash[!dir].tuple.dst.u3,
+			  IPPROTO_UDP, NULL, &rtcp_port);
 
 	if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
 		   &ct->tuplehash[!dir].tuple.dst.u3,
@@ -308,22 +308,22 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
 		ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
 				   taddr, port, rtp_port, rtp_exp, rtcp_exp);
 	} else {		/* Conntrack only */
-		if (nf_conntrack_expect_related(rtp_exp) == 0) {
-			if (nf_conntrack_expect_related(rtcp_exp) == 0) {
+		if (nf_ct_expect_related(rtp_exp) == 0) {
+			if (nf_ct_expect_related(rtcp_exp) == 0) {
 				DEBUGP("nf_ct_h323: expect RTP ");
 				NF_CT_DUMP_TUPLE(&rtp_exp->tuple);
 				DEBUGP("nf_ct_h323: expect RTCP ");
 				NF_CT_DUMP_TUPLE(&rtcp_exp->tuple);
 			} else {
-				nf_conntrack_unexpect_related(rtp_exp);
+				nf_ct_unexpect_related(rtp_exp);
 				ret = -1;
 			}
 		} else
 			ret = -1;
 	}
 
-	nf_conntrack_expect_put(rtp_exp);
-	nf_conntrack_expect_put(rtcp_exp);
+	nf_ct_expect_put(rtp_exp);
+	nf_ct_expect_put(rtcp_exp);
 
 	return ret;
 }
@@ -349,12 +349,12 @@ static int expect_t120(struct sk_buff **pskb,
 		return 0;
 
 	/* Create expect for T.120 connections */
-	if ((exp = nf_conntrack_expect_alloc(ct)) == NULL)
+	if ((exp = nf_ct_expect_alloc(ct)) == NULL)
 		return -1;
-	nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
-				 &ct->tuplehash[!dir].tuple.src.u3,
-				 &ct->tuplehash[!dir].tuple.dst.u3,
-				 IPPROTO_TCP, NULL, &port);
+	nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
+			  &ct->tuplehash[!dir].tuple.src.u3,
+			  &ct->tuplehash[!dir].tuple.dst.u3,
+			  IPPROTO_TCP, NULL, &port);
 	exp->flags = NF_CT_EXPECT_PERMANENT;	/* Accept multiple channels */
 
 	if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -366,14 +366,14 @@ static int expect_t120(struct sk_buff **pskb,
 		ret = nat_t120(pskb, ct, ctinfo, data, dataoff, taddr,
 			       port, exp);
 	} else {		/* Conntrack only */
-		if (nf_conntrack_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp) == 0) {
 			DEBUGP("nf_ct_h323: expect T.120 ");
 			NF_CT_DUMP_TUPLE(&exp->tuple);
 		} else
 			ret = -1;
 	}
 
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_put(exp);
 
 	return ret;
 }
@@ -684,12 +684,12 @@ static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct,
 		return 0;
 
 	/* Create expect for h245 connection */
-	if ((exp = nf_conntrack_expect_alloc(ct)) == NULL)
+	if ((exp = nf_ct_expect_alloc(ct)) == NULL)
 		return -1;
-	nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
-				 &ct->tuplehash[!dir].tuple.src.u3,
-				 &ct->tuplehash[!dir].tuple.dst.u3,
-				 IPPROTO_TCP, NULL, &port);
+	nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
+			  &ct->tuplehash[!dir].tuple.src.u3,
+			  &ct->tuplehash[!dir].tuple.dst.u3,
+			  IPPROTO_TCP, NULL, &port);
 	exp->helper = &nf_conntrack_helper_h245;
 
 	if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -701,14 +701,14 @@ static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct,
 		ret = nat_h245(pskb, ct, ctinfo, data, dataoff, taddr,
 			       port, exp);
 	} else {		/* Conntrack only */
-		if (nf_conntrack_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp) == 0) {
 			DEBUGP("nf_ct_q931: expect H.245 ");
 			NF_CT_DUMP_TUPLE(&exp->tuple);
 		} else
 			ret = -1;
 	}
 
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_put(exp);
 
 	return ret;
 }
@@ -796,11 +796,11 @@ static int expect_callforwarding(struct sk_buff **pskb,
 	}
 
 	/* Create expect for the second call leg */
-	if ((exp = nf_conntrack_expect_alloc(ct)) == NULL)
+	if ((exp = nf_ct_expect_alloc(ct)) == NULL)
 		return -1;
-	nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
-				 &ct->tuplehash[!dir].tuple.src.u3, &addr,
-				 IPPROTO_TCP, NULL, &port);
+	nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
+			  &ct->tuplehash[!dir].tuple.src.u3, &addr,
+			  IPPROTO_TCP, NULL, &port);
 	exp->helper = nf_conntrack_helper_q931;
 
 	if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -812,14 +812,14 @@ static int expect_callforwarding(struct sk_buff **pskb,
 		ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff,
 					 taddr, port, exp);
 	} else {		/* Conntrack only */
-		if (nf_conntrack_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp) == 0) {
 			DEBUGP("nf_ct_q931: expect Call Forwarding ");
 			NF_CT_DUMP_TUPLE(&exp->tuple);
 		} else
 			ret = -1;
 	}
 
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_put(exp);
 
 	return ret;
 }
@@ -1225,7 +1225,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
 	tuple.dst.u.tcp.port = port;
 	tuple.dst.protonum = IPPROTO_TCP;
 
-	exp = __nf_conntrack_expect_find(&tuple);
+	exp = __nf_ct_expect_find(&tuple);
 	if (exp && exp->master == ct)
 		return exp;
 	return NULL;
@@ -1271,14 +1271,13 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct,
 		return 0;
 
 	/* Create expect for Q.931 */
-	if ((exp = nf_conntrack_expect_alloc(ct)) == NULL)
+	if ((exp = nf_ct_expect_alloc(ct)) == NULL)
 		return -1;
-	nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
-				 gkrouted_only ? /* only accept calls from GK? */
-					&ct->tuplehash[!dir].tuple.src.u3 :
-					NULL,
-				 &ct->tuplehash[!dir].tuple.dst.u3,
-				 IPPROTO_TCP, NULL, &port);
+	nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
+			  gkrouted_only ? /* only accept calls from GK? */
+				&ct->tuplehash[!dir].tuple.src.u3 : NULL,
+			  &ct->tuplehash[!dir].tuple.dst.u3,
+			  IPPROTO_TCP, NULL, &port);
 	exp->helper = nf_conntrack_helper_q931;
 	exp->flags = NF_CT_EXPECT_PERMANENT;	/* Accept multiple calls */
 
@@ -1286,7 +1285,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct,
 	if (nat_q931 && ct->status & IPS_NAT_MASK) {	/* Need NAT */
 		ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp);
 	} else {		/* Conntrack only */
-		if (nf_conntrack_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp) == 0) {
 			DEBUGP("nf_ct_ras: expect Q.931 ");
 			NF_CT_DUMP_TUPLE(&exp->tuple);
 
@@ -1296,7 +1295,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct,
 			ret = -1;
 	}
 
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_put(exp);
 
 	return ret;
 }
@@ -1343,20 +1342,20 @@ static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct,
 		return 0;
 
 	/* Need new expect */
-	if ((exp = nf_conntrack_expect_alloc(ct)) == NULL)
+	if ((exp = nf_ct_expect_alloc(ct)) == NULL)
 		return -1;
-	nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
-				 &ct->tuplehash[!dir].tuple.src.u3, &addr,
-				 IPPROTO_UDP, NULL, &port);
+	nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
+			  &ct->tuplehash[!dir].tuple.src.u3, &addr,
+			  IPPROTO_UDP, NULL, &port);
 	exp->helper = nf_conntrack_helper_ras;
 
-	if (nf_conntrack_expect_related(exp) == 0) {
+	if (nf_ct_expect_related(exp) == 0) {
 		DEBUGP("nf_ct_ras: expect RAS ");
 		NF_CT_DUMP_TUPLE(&exp->tuple);
 	} else
 		ret = -1;
 
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_put(exp);
 
 	return ret;
 }
@@ -1548,21 +1547,21 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct,
 	}
 
 	/* Need new expect */
-	if ((exp = nf_conntrack_expect_alloc(ct)) == NULL)
+	if ((exp = nf_ct_expect_alloc(ct)) == NULL)
 		return -1;
-	nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
-				 &ct->tuplehash[!dir].tuple.src.u3, &addr,
-				 IPPROTO_TCP, NULL, &port);
+	nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
+			  &ct->tuplehash[!dir].tuple.src.u3, &addr,
+			  IPPROTO_TCP, NULL, &port);
 	exp->flags = NF_CT_EXPECT_PERMANENT;
 	exp->helper = nf_conntrack_helper_q931;
 
-	if (nf_conntrack_expect_related(exp) == 0) {
+	if (nf_ct_expect_related(exp) == 0) {
 		DEBUGP("nf_ct_ras: expect Q.931 ");
 		NF_CT_DUMP_TUPLE(&exp->tuple);
 	} else
 		ret = -1;
 
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_put(exp);
 
 	return ret;
 }
@@ -1601,21 +1600,21 @@ static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct,
 		return 0;
 
 	/* Need new expect for call signal */
-	if ((exp = nf_conntrack_expect_alloc(ct)) == NULL)
+	if ((exp = nf_ct_expect_alloc(ct)) == NULL)
 		return -1;
-	nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
-				 &ct->tuplehash[!dir].tuple.src.u3, &addr,
-				 IPPROTO_TCP, NULL, &port);
+	nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
+			  &ct->tuplehash[!dir].tuple.src.u3, &addr,
+			  IPPROTO_TCP, NULL, &port);
 	exp->flags = NF_CT_EXPECT_PERMANENT;
 	exp->helper = nf_conntrack_helper_q931;
 
-	if (nf_conntrack_expect_related(exp) == 0) {
+	if (nf_ct_expect_related(exp) == 0) {
 		DEBUGP("nf_ct_ras: expect Q.931 ");
 		NF_CT_DUMP_TUPLE(&exp->tuple);
 	} else
 		ret = -1;
 
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_put(exp);
 
 	/* Ignore rasAddress */
 
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 3fc6e9f..89a5f73 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -123,12 +123,12 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 	list_del(&me->list);
 
 	/* Get rid of expectations */
-	list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
+	list_for_each_entry_safe(exp, tmp, &nf_ct_expect_list, list) {
 		struct nf_conn_help *help = nfct_help(exp->master);
 		if ((help->helper == me || exp->helper == me) &&
 		    del_timer(&exp->timeout)) {
 			nf_ct_unlink_expect(exp);
-			nf_conntrack_expect_put(exp);
+			nf_ct_expect_put(exp);
 		}
 	}
 
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 43ccd0e..79da93e 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -184,16 +184,16 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 				continue;
 			}
 
-			exp = nf_conntrack_expect_alloc(ct);
+			exp = nf_ct_expect_alloc(ct);
 			if (exp == NULL) {
 				ret = NF_DROP;
 				goto out;
 			}
 			tuple = &ct->tuplehash[!dir].tuple;
 			port = htons(dcc_port);
-			nf_conntrack_expect_init(exp, tuple->src.l3num,
-						 NULL, &tuple->dst.u3,
-						 IPPROTO_TCP, NULL, &port);
+			nf_ct_expect_init(exp, tuple->src.l3num,
+					  NULL, &tuple->dst.u3,
+					  IPPROTO_TCP, NULL, &port);
 
 			nf_nat_irc = rcu_dereference(nf_nat_irc_hook);
 			if (nf_nat_irc && ct->status & IPS_NAT_MASK)
@@ -201,9 +201,9 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 						 addr_beg_p - ib_ptr,
 						 addr_end_p - addr_beg_p,
 						 exp);
-			else if (nf_conntrack_expect_related(exp) != 0)
+			else if (nf_ct_expect_related(exp) != 0)
 				ret = NF_DROP;
-			nf_conntrack_expect_put(exp);
+			nf_ct_expect_put(exp);
 			goto out;
 		}
 	}
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 1093478..ea585c7 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -74,7 +74,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 	if (mask == 0)
 		goto out;
 
-	exp = nf_conntrack_expect_alloc(ct);
+	exp = nf_ct_expect_alloc(ct);
 	if (exp == NULL)
 		goto out;
 
@@ -91,8 +91,8 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 	exp->flags                = NF_CT_EXPECT_PERMANENT;
 	exp->helper               = NULL;
 
-	nf_conntrack_expect_related(exp);
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_related(exp);
+	nf_ct_expect_put(exp);
 
 	nf_ct_refresh(ct, *pskb, timeout * HZ);
 out:
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d310ec8..954cc58 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1239,7 +1239,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	u_int8_t l3proto = nfmsg->nfgen_family;
 
 	read_lock_bh(&nf_conntrack_lock);
-	list_for_each_prev(i, &nf_conntrack_expect_list) {
+	list_for_each_prev(i, &nf_ct_expect_list) {
 		exp = (struct nf_conntrack_expect *) i;
 		if (l3proto && exp->tuple.src.l3num != l3proto)
 			continue;
@@ -1291,14 +1291,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	exp = nf_conntrack_expect_find_get(&tuple);
+	exp = nf_ct_expect_find_get(&tuple);
 	if (!exp)
 		return -ENOENT;
 
 	if (cda[CTA_EXPECT_ID-1]) {
 		__be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
 		if (exp->id != ntohl(id)) {
-			nf_conntrack_expect_put(exp);
+			nf_ct_expect_put(exp);
 			return -ENOENT;
 		}
 	}
@@ -1314,14 +1314,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err <= 0)
 		goto free;
 
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_put(exp);
 
 	return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
 
 free:
 	kfree_skb(skb2);
 out:
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_put(exp);
 	return err;
 }
 
@@ -1346,23 +1346,23 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			return err;
 
 		/* bump usage count to 2 */
-		exp = nf_conntrack_expect_find_get(&tuple);
+		exp = nf_ct_expect_find_get(&tuple);
 		if (!exp)
 			return -ENOENT;
 
 		if (cda[CTA_EXPECT_ID-1]) {
 			__be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
 			if (exp->id != ntohl(id)) {
-				nf_conntrack_expect_put(exp);
+				nf_ct_expect_put(exp);
 				return -ENOENT;
 			}
 		}
 
 		/* after list removal, usage count == 1 */
-		nf_conntrack_unexpect_related(exp);
+		nf_ct_unexpect_related(exp);
 		/* have to put what we 'get' above.
 		 * after this line usage count == 0 */
-		nf_conntrack_expect_put(exp);
+		nf_ct_expect_put(exp);
 	} else if (cda[CTA_EXPECT_HELP_NAME-1]) {
 		char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]);
 
@@ -1373,24 +1373,22 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			write_unlock_bh(&nf_conntrack_lock);
 			return -EINVAL;
 		}
-		list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list,
-					 list) {
+		list_for_each_entry_safe(exp, tmp, &nf_ct_expect_list, list) {
 			struct nf_conn_help *m_help = nfct_help(exp->master);
 			if (m_help->helper == h
 			    && del_timer(&exp->timeout)) {
 				nf_ct_unlink_expect(exp);
-				nf_conntrack_expect_put(exp);
+				nf_ct_expect_put(exp);
 			}
 		}
 		write_unlock_bh(&nf_conntrack_lock);
 	} else {
 		/* This basically means we have to flush everything*/
 		write_lock_bh(&nf_conntrack_lock);
-		list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list,
-					 list) {
+		list_for_each_entry_safe(exp, tmp, &nf_ct_expect_list, list) {
 			if (del_timer(&exp->timeout)) {
 				nf_ct_unlink_expect(exp);
-				nf_conntrack_expect_put(exp);
+				nf_ct_expect_put(exp);
 			}
 		}
 		write_unlock_bh(&nf_conntrack_lock);
@@ -1438,7 +1436,7 @@ ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3)
 		goto out;
 	}
 
-	exp = nf_conntrack_expect_alloc(ct);
+	exp = nf_ct_expect_alloc(ct);
 	if (!exp) {
 		err = -ENOMEM;
 		goto out;
@@ -1451,8 +1449,8 @@ ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3)
 	memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
 	memcpy(&exp->mask, &mask, sizeof(struct nf_conntrack_tuple));
 
-	err = nf_conntrack_expect_related(exp);
-	nf_conntrack_expect_put(exp);
+	err = nf_ct_expect_related(exp);
+	nf_ct_expect_put(exp);
 
 out:
 	nf_ct_put(nf_ct_tuplehash_to_ctrack(h));
@@ -1482,7 +1480,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 		return err;
 
 	write_lock_bh(&nf_conntrack_lock);
-	exp = __nf_conntrack_expect_find(&tuple);
+	exp = __nf_ct_expect_find(&tuple);
 
 	if (!exp) {
 		write_unlock_bh(&nf_conntrack_lock);
@@ -1572,7 +1570,7 @@ static int __init ctnetlink_init(void)
 		goto err_unreg_exp_subsys;
 	}
 
-	ret = nf_conntrack_expect_register_notifier(&ctnl_notifier_exp);
+	ret = nf_ct_expect_register_notifier(&ctnl_notifier_exp);
 	if (ret < 0) {
 		printk("ctnetlink_init: cannot expect register notifier.\n");
 		goto err_unreg_notifier;
@@ -1598,7 +1596,7 @@ static void __exit ctnetlink_exit(void)
 	printk("ctnetlink: unregistering from nfnetlink.\n");
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-	nf_conntrack_expect_unregister_notifier(&ctnl_notifier_exp);
+	nf_ct_expect_unregister_notifier(&ctnl_notifier_exp);
 	nf_conntrack_unregister_notifier(&ctnl_notifier);
 #endif
 
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index da36c48..916e106 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -124,12 +124,12 @@ static void pptp_expectfn(struct nf_conn *ct,
 		DEBUGP("trying to unexpect other dir: ");
 		NF_CT_DUMP_TUPLE(&inv_t);
 
-		exp_other = nf_conntrack_expect_find_get(&inv_t);
+		exp_other = nf_ct_expect_find_get(&inv_t);
 		if (exp_other) {
 			/* delete other expectation.  */
 			DEBUGP("found\n");
-			nf_conntrack_unexpect_related(exp_other);
-			nf_conntrack_expect_put(exp_other);
+			nf_ct_unexpect_related(exp_other);
+			nf_ct_expect_put(exp_other);
 		} else {
 			DEBUGP("not found\n");
 		}
@@ -157,11 +157,11 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
 		nf_ct_put(sibling);
 		return 1;
 	} else {
-		exp = nf_conntrack_expect_find_get(t);
+		exp = nf_ct_expect_find_get(t);
 		if (exp) {
 			DEBUGP("unexpect_related of expect %p\n", exp);
-			nf_conntrack_unexpect_related(exp);
-			nf_conntrack_expect_put(exp);
+			nf_ct_unexpect_related(exp);
+			nf_ct_expect_put(exp);
 			return 1;
 		}
 	}
@@ -201,36 +201,36 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid)
 	int ret = 1;
 	typeof(nf_nat_pptp_hook_exp_gre) nf_nat_pptp_exp_gre;
 
-	exp_orig = nf_conntrack_expect_alloc(ct);
+	exp_orig = nf_ct_expect_alloc(ct);
 	if (exp_orig == NULL)
 		goto out;
 
-	exp_reply = nf_conntrack_expect_alloc(ct);
+	exp_reply = nf_ct_expect_alloc(ct);
 	if (exp_reply == NULL)
 		goto out_put_orig;
 
 	/* original direction, PNS->PAC */
 	dir = IP_CT_DIR_ORIGINAL;
-	nf_conntrack_expect_init(exp_orig, ct->tuplehash[dir].tuple.src.l3num,
-				 &ct->tuplehash[dir].tuple.src.u3,
-				 &ct->tuplehash[dir].tuple.dst.u3,
-				 IPPROTO_GRE, &peer_callid, &callid);
+	nf_ct_expect_init(exp_orig, ct->tuplehash[dir].tuple.src.l3num,
+			  &ct->tuplehash[dir].tuple.src.u3,
+			  &ct->tuplehash[dir].tuple.dst.u3,
+			  IPPROTO_GRE, &peer_callid, &callid);
 	exp_orig->expectfn = pptp_expectfn;
 
 	/* reply direction, PAC->PNS */
 	dir = IP_CT_DIR_REPLY;
-	nf_conntrack_expect_init(exp_reply, ct->tuplehash[dir].tuple.src.l3num,
-				 &ct->tuplehash[dir].tuple.src.u3,
-				 &ct->tuplehash[dir].tuple.dst.u3,
-				 IPPROTO_GRE, &callid, &peer_callid);
+	nf_ct_expect_init(exp_reply, ct->tuplehash[dir].tuple.src.l3num,
+			  &ct->tuplehash[dir].tuple.src.u3,
+			  &ct->tuplehash[dir].tuple.dst.u3,
+			  IPPROTO_GRE, &callid, &peer_callid);
 	exp_reply->expectfn = pptp_expectfn;
 
 	nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre);
 	if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK)
 		nf_nat_pptp_exp_gre(exp_orig, exp_reply);
-	if (nf_conntrack_expect_related(exp_orig) != 0)
+	if (nf_ct_expect_related(exp_orig) != 0)
 		goto out_put_both;
-	if (nf_conntrack_expect_related(exp_reply) != 0)
+	if (nf_ct_expect_related(exp_reply) != 0)
 		goto out_unexpect_orig;
 
 	/* Add GRE keymap entries */
@@ -243,16 +243,16 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid)
 	ret = 0;
 
 out_put_both:
-	nf_conntrack_expect_put(exp_reply);
+	nf_ct_expect_put(exp_reply);
 out_put_orig:
-	nf_conntrack_expect_put(exp_orig);
+	nf_ct_expect_put(exp_orig);
 out:
 	return ret;
 
 out_unexpect_both:
-	nf_conntrack_unexpect_related(exp_reply);
+	nf_ct_unexpect_related(exp_reply);
 out_unexpect_orig:
-	nf_conntrack_unexpect_related(exp_orig);
+	nf_ct_unexpect_related(exp_orig);
 	goto out_put_both;
 }
 
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index eb2d1dc..28ed303 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -141,27 +141,25 @@ static int help(struct sk_buff **pskb,
 	if (reply->zero != 0)
 		goto out;
 
-	exp = nf_conntrack_expect_alloc(ct);
+	exp = nf_ct_expect_alloc(ct);
 	if (exp == NULL) {
 		ret = NF_DROP;
 		goto out;
 	}
 
 	tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-	nf_conntrack_expect_init(exp, family,
-				 &tuple->src.u3, &tuple->dst.u3,
-				 IPPROTO_TCP,
-				 NULL, &reply->port);
+	nf_ct_expect_init(exp, family, &tuple->src.u3, &tuple->dst.u3,
+			  IPPROTO_TCP, NULL, &reply->port);
 
 	DEBUGP("nf_ct_sane: expect: ");
 	NF_CT_DUMP_TUPLE(&exp->tuple);
 	NF_CT_DUMP_TUPLE(&exp->mask);
 
 	/* Can't expect this?  Best to drop packet now. */
-	if (nf_conntrack_expect_related(exp) != 0)
+	if (nf_ct_expect_related(exp) != 0)
 		ret = NF_DROP;
 
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_put(exp);
 
 out:
 	spin_unlock_bh(&nf_sane_lock);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 1b5c6c1..1f17f80 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -378,23 +378,23 @@ static int set_expected_rtp(struct sk_buff **pskb,
 	int ret;
 	typeof(nf_nat_sdp_hook) nf_nat_sdp;
 
-	exp = nf_conntrack_expect_alloc(ct);
+	exp = nf_ct_expect_alloc(ct);
 	if (exp == NULL)
 		return NF_DROP;
-	nf_conntrack_expect_init(exp, family,
-				 &ct->tuplehash[!dir].tuple.src.u3, addr,
-				 IPPROTO_UDP, NULL, &port);
+	nf_ct_expect_init(exp, family,
+			  &ct->tuplehash[!dir].tuple.src.u3, addr,
+			  IPPROTO_UDP, NULL, &port);
 
 	nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook);
 	if (nf_nat_sdp && ct->status & IPS_NAT_MASK)
 		ret = nf_nat_sdp(pskb, ctinfo, exp, dptr);
 	else {
-		if (nf_conntrack_expect_related(exp) != 0)
+		if (nf_ct_expect_related(exp) != 0)
 			ret = NF_DROP;
 		else
 			ret = NF_ACCEPT;
 	}
-	nf_conntrack_expect_put(exp);
+	nf_ct_expect_put(exp);
 
 	return ret;
 }
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index 37c4542..53d57b4 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -66,14 +66,12 @@ static int tftp_help(struct sk_buff **pskb,
 		NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 		NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
-		exp = nf_conntrack_expect_alloc(ct);
+		exp = nf_ct_expect_alloc(ct);
 		if (exp == NULL)
 			return NF_DROP;
 		tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-		nf_conntrack_expect_init(exp, family,
-					 &tuple->src.u3, &tuple->dst.u3,
-					 IPPROTO_UDP,
-					 NULL, &tuple->dst.u.udp.port);
+		nf_ct_expect_init(exp, family, &tuple->src.u3, &tuple->dst.u3,
+				  IPPROTO_UDP, NULL, &tuple->dst.u.udp.port);
 
 		DEBUGP("expect: ");
 		NF_CT_DUMP_TUPLE(&exp->tuple);
@@ -82,9 +80,9 @@ static int tftp_help(struct sk_buff **pskb,
 		nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
 		if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
 			ret = nf_nat_tftp(pskb, ctinfo, exp);
-		else if (nf_conntrack_expect_related(exp) != 0)
+		else if (nf_ct_expect_related(exp) != 0)
 			ret = NF_DROP;
-		nf_conntrack_expect_put(exp);
+		nf_ct_expect_put(exp);
 		break;
 	case TFTP_OPCODE_DATA:
 	case TFTP_OPCODE_ACK:
-- 
cgit v0.10.2


From df43b4e7ca46952756b2fc039ed80469b1bff62d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:31:07 -0700
Subject: [NETFILTER]: nf_conntrack_ftp: use nf_ct_expect_init

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 5efe65d..9ad1519 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -364,6 +364,7 @@ static int help(struct sk_buff **pskb,
 	unsigned int matchlen, matchoff;
 	struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info;
 	struct nf_conntrack_expect *exp;
+	union nf_conntrack_address *daddr;
 	struct nf_conntrack_man cmd = {};
 	unsigned int i;
 	int found = 0, ends_in_nl;
@@ -454,7 +455,7 @@ static int help(struct sk_buff **pskb,
 	/* We refer to the reverse direction ("!dir") tuples here,
 	 * because we're expecting something in the other direction.
 	 * Doesn't matter unless NAT is happening.  */
-	exp->tuple.dst.u3 = ct->tuplehash[!dir].tuple.dst.u3;
+	daddr = &ct->tuplehash[!dir].tuple.dst.u3;
 
 	/* Update the ftp info */
 	if ((cmd.l3num == ct->tuplehash[dir].tuple.src.l3num) &&
@@ -483,37 +484,12 @@ static int help(struct sk_buff **pskb,
 			ret = NF_ACCEPT;
 			goto out_put_expect;
 		}
-		memcpy(&exp->tuple.dst.u3, &cmd.u3.all,
-		       sizeof(exp->tuple.dst.u3));
+		daddr = &cmd.u3;
 	}
 
-	exp->tuple.src.u3 = ct->tuplehash[!dir].tuple.src.u3;
-	exp->tuple.src.l3num = cmd.l3num;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.u.tcp.port = cmd.u.tcp.port;
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-
-	exp->mask = (struct nf_conntrack_tuple)
-		    { .src = { .l3num = 0xFFFF,
-			       .u = { .tcp = { 0 }},
-			     },
-		      .dst = { .protonum = 0xFF,
-			       .u = { .tcp = { __constant_htons(0xFFFF) }},
-			     },
-		    };
-	if (cmd.l3num == PF_INET) {
-		exp->mask.src.u3.ip = htonl(0xFFFFFFFF);
-		exp->mask.dst.u3.ip = htonl(0xFFFFFFFF);
-	} else {
-		memset(exp->mask.src.u3.ip6, 0xFF,
-		       sizeof(exp->mask.src.u3.ip6));
-		memset(exp->mask.dst.u3.ip6, 0xFF,
-		       sizeof(exp->mask.src.u3.ip6));
-	}
-
-	exp->expectfn = NULL;
-	exp->helper = NULL;
-	exp->flags = 0;
+	nf_ct_expect_init(exp, cmd.l3num,
+			  &ct->tuplehash[!dir].tuple.src.u3, daddr,
+			  IPPROTO_TCP, NULL, &cmd.u.tcp.port);
 
 	/* Now, NAT might want to mangle the packet, and register the
 	 * (possibly changed) expectation itself. */
-- 
cgit v0.10.2


From d4156e8cd93f5772483928aaf4960120caebd789 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:31:32 -0700
Subject: [NETFILTER]: nf_conntrack: reduce masks to a subset of tuples

Since conntrack currently allows to use masks for every bit of both
helper and expectation tuples, we can't hash them and have to keep
them on two global lists that are searched for every new connection.

This patch removes the never used ability to use masks for the
destination part of the expectation tuple and completely removes
masks from helpers since the only reasonable choice is a full
match on l3num, protonum and src.u.all.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index c0b1d1f..13643f7 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -16,7 +16,8 @@ struct nf_conntrack_expect
 	struct list_head list;
 
 	/* We expect this tuple, with the following mask */
-	struct nf_conntrack_tuple tuple, mask;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_tuple_mask mask;
 
 	/* Function to call after setup and insertion */
 	void (*expectfn)(struct nf_conn *new,
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index b43a75b..d62e6f0 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -24,10 +24,9 @@ struct nf_conntrack_helper
 					 * expected connections */
 	unsigned int timeout;		/* timeout for expecteds */
 
-	/* Mask of things we will help (compared against server response) */
+	/* Tuple of things we will help (compared against server response) */
 	struct nf_conntrack_tuple tuple;
-	struct nf_conntrack_tuple mask;
-	
+
 	/* Function to call when data passes; return verdict, or -1 to
            invalidate. */
 	int (*help)(struct sk_buff **pskb,
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index d02ce87..99934ab5 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -100,6 +100,14 @@ struct nf_conntrack_tuple
 	} dst;
 };
 
+struct nf_conntrack_tuple_mask
+{
+	struct {
+		union nf_conntrack_address u3;
+		union nf_conntrack_man_proto u;
+	} src;
+};
+
 /* This is optimized opposed to a memset of the whole structure.  Everything we
  * really care about is the  source/destination unions */
 #define NF_CT_TUPLE_U_BLANK(tuple)                              	\
@@ -161,31 +169,44 @@ static inline int nf_ct_tuple_equal(const struct nf_conntrack_tuple *t1,
 	return nf_ct_tuple_src_equal(t1, t2) && nf_ct_tuple_dst_equal(t1, t2);
 }
 
+static inline int nf_ct_tuple_mask_equal(const struct nf_conntrack_tuple_mask *m1,
+					 const struct nf_conntrack_tuple_mask *m2)
+{
+	return (m1->src.u3.all[0] == m2->src.u3.all[0] &&
+		m1->src.u3.all[1] == m2->src.u3.all[1] &&
+		m1->src.u3.all[2] == m2->src.u3.all[2] &&
+		m1->src.u3.all[3] == m2->src.u3.all[3] &&
+		m1->src.u.all == m2->src.u.all);
+}
+
+static inline int nf_ct_tuple_src_mask_cmp(const struct nf_conntrack_tuple *t1,
+					   const struct nf_conntrack_tuple *t2,
+					   const struct nf_conntrack_tuple_mask *mask)
+{
+	int count;
+
+	for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++) {
+		if ((t1->src.u3.all[count] ^ t2->src.u3.all[count]) &
+		    mask->src.u3.all[count])
+			return 0;
+	}
+
+	if ((t1->src.u.all ^ t2->src.u.all) & mask->src.u.all)
+		return 0;
+
+	if (t1->src.l3num != t2->src.l3num ||
+	    t1->dst.protonum != t2->dst.protonum)
+		return 0;
+
+	return 1;
+}
+
 static inline int nf_ct_tuple_mask_cmp(const struct nf_conntrack_tuple *t,
 				       const struct nf_conntrack_tuple *tuple,
-				       const struct nf_conntrack_tuple *mask)
+				       const struct nf_conntrack_tuple_mask *mask)
 {
-	int count = 0;
-
-        for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
-                if ((t->src.u3.all[count] ^ tuple->src.u3.all[count]) &
-                    mask->src.u3.all[count])
-                        return 0;
-        }
-
-        for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
-                if ((t->dst.u3.all[count] ^ tuple->dst.u3.all[count]) &
-                    mask->dst.u3.all[count])
-                        return 0;
-        }
-
-        if ((t->src.u.all ^ tuple->src.u.all) & mask->src.u.all ||
-            (t->dst.u.all ^ tuple->dst.u.all) & mask->dst.u.all ||
-            (t->src.l3num ^ tuple->src.l3num) & mask->src.l3num ||
-            (t->dst.protonum ^ tuple->dst.protonum) & mask->dst.protonum)
-                return 0;
-
-        return 1;
+	return nf_ct_tuple_src_mask_cmp(t, tuple, mask) &&
+	       nf_ct_tuple_dst_equal(t, tuple);
 }
 
 #endif /* _NF_CONNTRACK_TUPLE_H */
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 6e88505..6bfcd3a 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1276,9 +1276,6 @@ static struct nf_conntrack_helper snmp_helper __read_mostly = {
 	.tuple.src.l3num	= AF_INET,
 	.tuple.src.u.udp.port	= __constant_htons(SNMP_PORT),
 	.tuple.dst.protonum	= IPPROTO_UDP,
-	.mask.src.l3num		= 0xFFFF,
-	.mask.src.u.udp.port	= __constant_htons(0xFFFF),
-	.mask.dst.protonum	= 0xFF,
 };
 
 static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
@@ -1290,9 +1287,6 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
 	.tuple.src.l3num	= AF_INET,
 	.tuple.src.u.udp.port	= __constant_htons(SNMP_TRAP_PORT),
 	.tuple.dst.protonum	= IPPROTO_UDP,
-	.mask.src.l3num		= 0xFFFF,
-	.mask.src.u.udp.port	= __constant_htons(0xFFFF),
-	.mask.dst.protonum	= 0xFF,
 };
 
 /*****************************************************************************
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index d21359e..e42ab23 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -174,9 +174,6 @@ static struct nf_conntrack_helper amanda_helper[2] __read_mostly = {
 		.tuple.src.l3num	= AF_INET,
 		.tuple.src.u.udp.port	= __constant_htons(10080),
 		.tuple.dst.protonum	= IPPROTO_UDP,
-		.mask.src.l3num		= 0xFFFF,
-		.mask.src.u.udp.port	= __constant_htons(0xFFFF),
-		.mask.dst.protonum	= 0xFF,
 	},
 	{
 		.name			= "amanda",
@@ -187,9 +184,6 @@ static struct nf_conntrack_helper amanda_helper[2] __read_mostly = {
 		.tuple.src.l3num	= AF_INET6,
 		.tuple.src.u.udp.port	= __constant_htons(10080),
 		.tuple.dst.protonum	= IPPROTO_UDP,
-		.mask.src.l3num		= 0xFFFF,
-		.mask.src.u.udp.port	= __constant_htons(0xFFFF),
-		.mask.dst.protonum	= 0xFF,
 	},
 };
 
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4130ea6..83b5ad8 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -141,25 +141,16 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
 {
 	/* Part covered by intersection of masks must be unequal,
 	   otherwise they clash */
-	struct nf_conntrack_tuple intersect_mask;
+	struct nf_conntrack_tuple_mask intersect_mask;
 	int count;
 
-	intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
 	intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
-	intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
-	intersect_mask.dst.protonum = a->mask.dst.protonum
-					& b->mask.dst.protonum;
 
 	for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
 		intersect_mask.src.u3.all[count] =
 			a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
 	}
 
-	for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
-		intersect_mask.dst.u3.all[count] =
-			a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
-	}
-
 	return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
 }
 
@@ -168,7 +159,7 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
 {
 	return a->master == b->master
 		&& nf_ct_tuple_equal(&a->tuple, &b->tuple)
-		&& nf_ct_tuple_equal(&a->mask, &b->mask);
+		&& nf_ct_tuple_mask_equal(&a->mask, &b->mask);
 }
 
 /* Generally a bad idea to call this: could have matched already. */
@@ -224,8 +215,6 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
 	exp->helper = NULL;
 	exp->tuple.src.l3num = family;
 	exp->tuple.dst.protonum = proto;
-	exp->mask.src.l3num = 0xFFFF;
-	exp->mask.dst.protonum = 0xFF;
 
 	if (saddr) {
 		memcpy(&exp->tuple.src.u3, saddr, len);
@@ -242,21 +231,6 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
 		memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
 	}
 
-	if (daddr) {
-		memcpy(&exp->tuple.dst.u3, daddr, len);
-		if (sizeof(exp->tuple.dst.u3) > len)
-			/* address needs to be cleared for nf_ct_tuple_equal */
-			memset((void *)&exp->tuple.dst.u3 + len, 0x00,
-			       sizeof(exp->tuple.dst.u3) - len);
-		memset(&exp->mask.dst.u3, 0xFF, len);
-		if (sizeof(exp->mask.dst.u3) > len)
-			memset((void *)&exp->mask.dst.u3 + len, 0x00,
-			       sizeof(exp->mask.dst.u3) - len);
-	} else {
-		memset(&exp->tuple.dst.u3, 0x00, sizeof(exp->tuple.dst.u3));
-		memset(&exp->mask.dst.u3, 0x00, sizeof(exp->mask.dst.u3));
-	}
-
 	if (src) {
 		exp->tuple.src.u.all = (__force u16)*src;
 		exp->mask.src.u.all = 0xFFFF;
@@ -265,13 +239,13 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
 		exp->mask.src.u.all = 0;
 	}
 
-	if (dst) {
-		exp->tuple.dst.u.all = (__force u16)*dst;
-		exp->mask.dst.u.all = 0xFFFF;
-	} else {
-		exp->tuple.dst.u.all = 0;
-		exp->mask.dst.u.all = 0;
-	}
+	memcpy(&exp->tuple.dst.u3, daddr, len);
+	if (sizeof(exp->tuple.dst.u3) > len)
+		/* address needs to be cleared for nf_ct_tuple_equal */
+		memset((void *)&exp->tuple.dst.u3 + len, 0x00,
+		       sizeof(exp->tuple.dst.u3) - len);
+
+	exp->tuple.dst.u.all = (__force u16)*dst;
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
 
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 9ad1519..198330b 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -560,9 +560,6 @@ static int __init nf_conntrack_ftp_init(void)
 		for (j = 0; j < 2; j++) {
 			ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]);
 			ftp[i][j].tuple.dst.protonum = IPPROTO_TCP;
-			ftp[i][j].mask.src.l3num = 0xFFFF;
-			ftp[i][j].mask.src.u.tcp.port = htons(0xFFFF);
-			ftp[i][j].mask.dst.protonum = 0xFF;
 			ftp[i][j].max_expected = 1;
 			ftp[i][j].timeout = 5 * 60;	/* 5 Minutes */
 			ftp[i][j].me = THIS_MODULE;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 61ae90f..8c57b81 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -626,8 +626,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = {
 	.max_expected		= H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */,
 	.timeout		= 240,
 	.tuple.dst.protonum	= IPPROTO_UDP,
-	.mask.src.u.udp.port	= __constant_htons(0xFFFF),
-	.mask.dst.protonum	= 0xFF,
 	.help			= h245_help
 };
 
@@ -1173,9 +1171,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
 		.tuple.src.l3num	= AF_INET,
 		.tuple.src.u.tcp.port	= __constant_htons(Q931_PORT),
 		.tuple.dst.protonum	= IPPROTO_TCP,
-		.mask.src.l3num		= 0xFFFF,
-		.mask.src.u.tcp.port	= __constant_htons(0xFFFF),
-		.mask.dst.protonum	= 0xFF,
 		.help			= q931_help
 	},
 	{
@@ -1187,9 +1182,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
 		.tuple.src.l3num	= AF_INET6,
 		.tuple.src.u.tcp.port	= __constant_htons(Q931_PORT),
 		.tuple.dst.protonum	= IPPROTO_TCP,
-		.mask.src.l3num		= 0xFFFF,
-		.mask.src.u.tcp.port	= __constant_htons(0xFFFF),
-		.mask.dst.protonum	= 0xFF,
 		.help			= q931_help
 	},
 };
@@ -1751,9 +1743,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = {
 		.tuple.src.l3num	= AF_INET,
 		.tuple.src.u.udp.port	= __constant_htons(RAS_PORT),
 		.tuple.dst.protonum	= IPPROTO_UDP,
-		.mask.src.l3num		= 0xFFFF,
-		.mask.src.u.udp.port	= __constant_htons(0xFFFF),
-		.mask.dst.protonum	= 0xFF,
 		.help			= ras_help,
 	},
 	{
@@ -1764,9 +1753,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = {
 		.tuple.src.l3num	= AF_INET6,
 		.tuple.src.u.udp.port	= __constant_htons(RAS_PORT),
 		.tuple.dst.protonum	= IPPROTO_UDP,
-		.mask.src.l3num		= 0xFFFF,
-		.mask.src.u.udp.port	= __constant_htons(0xFFFF),
-		.mask.dst.protonum	= 0xFF,
 		.help			= ras_help,
 	},
 };
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 89a5f73..fdabf82 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -34,9 +34,10 @@ struct nf_conntrack_helper *
 __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_helper *h;
+	struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) };
 
 	list_for_each_entry(h, &helpers, list) {
-		if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
+		if (nf_ct_tuple_src_mask_cmp(tuple, &h->tuple, &mask))
 			return h;
 	}
 	return NULL;
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 79da93e..8c73407 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -239,9 +239,6 @@ static int __init nf_conntrack_irc_init(void)
 		irc[i].tuple.src.l3num = AF_INET;
 		irc[i].tuple.src.u.tcp.port = htons(ports[i]);
 		irc[i].tuple.dst.protonum = IPPROTO_TCP;
-		irc[i].mask.src.l3num = 0xFFFF;
-		irc[i].mask.src.u.tcp.port = htons(0xFFFF);
-		irc[i].mask.dst.protonum = 0xFF;
 		irc[i].max_expected = max_dcc_channels;
 		irc[i].timeout = dcc_timeout;
 		irc[i].me = THIS_MODULE;
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index ea585c7..1d59fab 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -83,9 +83,6 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 
 	exp->mask.src.u3.ip       = mask;
 	exp->mask.src.u.udp.port  = htons(0xFFFF);
-	exp->mask.dst.u3.ip       = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.udp.port  = htons(0xFFFF);
-	exp->mask.dst.protonum    = 0xFF;
 
 	exp->expectfn             = NULL;
 	exp->flags                = NF_CT_EXPECT_PERMANENT;
@@ -104,9 +101,6 @@ static struct nf_conntrack_helper helper __read_mostly = {
 	.tuple.src.l3num	= AF_INET,
 	.tuple.src.u.udp.port	= __constant_htons(NMBD_PORT),
 	.tuple.dst.protonum	= IPPROTO_UDP,
-	.mask.src.l3num		= 0xFFFF,
-	.mask.src.u.udp.port	= __constant_htons(0xFFFF),
-	.mask.dst.protonum	= 0xFF,
 	.max_expected		= 1,
 	.me			= THIS_MODULE,
 	.help			= help,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 954cc58..2064914 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1094,22 +1094,29 @@ nfattr_failure:
 static inline int
 ctnetlink_exp_dump_mask(struct sk_buff *skb,
 			const struct nf_conntrack_tuple *tuple,
-			const struct nf_conntrack_tuple *mask)
+			const struct nf_conntrack_tuple_mask *mask)
 {
 	int ret;
 	struct nf_conntrack_l3proto *l3proto;
 	struct nf_conntrack_l4proto *l4proto;
-	struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK);
+	struct nf_conntrack_tuple m;
+	struct nfattr *nest_parms;
+
+	memset(&m, 0xFF, sizeof(m));
+	m.src.u.all = mask->src.u.all;
+	memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3));
+
+	nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK);
 
 	l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
-	ret = ctnetlink_dump_tuples_ip(skb, mask, l3proto);
+	ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto);
 	nf_ct_l3proto_put(l3proto);
 
 	if (unlikely(ret < 0))
 		goto nfattr_failure;
 
 	l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum);
-	ret = ctnetlink_dump_tuples_proto(skb, mask, l4proto);
+	ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto);
 	nf_ct_l4proto_put(l4proto);
 	if (unlikely(ret < 0))
 		goto nfattr_failure;
@@ -1447,7 +1454,8 @@ ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3)
 	exp->master = ct;
 	exp->helper = NULL;
 	memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
-	memcpy(&exp->mask, &mask, sizeof(struct nf_conntrack_tuple));
+	memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3));
+	exp->mask.src.u.all = mask.src.u.all;
 
 	err = nf_ct_expect_related(exp);
 	nf_ct_expect_put(exp);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 916e106..63dac5e 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -585,9 +585,6 @@ static struct nf_conntrack_helper pptp __read_mostly = {
 	.tuple.src.l3num	= AF_INET,
 	.tuple.src.u.tcp.port	= __constant_htons(PPTP_CONTROL_PORT),
 	.tuple.dst.protonum	= IPPROTO_TCP,
-	.mask.src.l3num		= 0xffff,
-	.mask.src.u.tcp.port	= __constant_htons(0xffff),
-	.mask.dst.protonum	= 0xff,
 	.help			= conntrack_pptp_help,
 	.destroy		= pptp_destroy_siblings,
 };
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 28ed303..edd10df 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -206,8 +206,6 @@ static int __init nf_conntrack_sane_init(void)
 		for (j = 0; j < 2; j++) {
 			sane[i][j].tuple.src.u.tcp.port = htons(ports[i]);
 			sane[i][j].tuple.dst.protonum = IPPROTO_TCP;
-			sane[i][j].mask.src.u.tcp.port = 0xFFFF;
-			sane[i][j].mask.dst.protonum = 0xFF;
 			sane[i][j].max_expected = 1;
 			sane[i][j].timeout = 5 * 60;	/* 5 Minutes */
 			sane[i][j].me = THIS_MODULE;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 1f17f80..5b78f0e 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -506,9 +506,6 @@ static int __init nf_conntrack_sip_init(void)
 		for (j = 0; j < 2; j++) {
 			sip[i][j].tuple.dst.protonum = IPPROTO_UDP;
 			sip[i][j].tuple.src.u.udp.port = htons(ports[i]);
-			sip[i][j].mask.src.l3num = 0xFFFF;
-			sip[i][j].mask.src.u.udp.port = htons(0xFFFF);
-			sip[i][j].mask.dst.protonum = 0xFF;
 			sip[i][j].max_expected = 2;
 			sip[i][j].timeout = 3 * 60; /* 3 minutes */
 			sip[i][j].me = THIS_MODULE;
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index 53d57b4..db0387c 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -126,9 +126,6 @@ static int __init nf_conntrack_tftp_init(void)
 		for (j = 0; j < 2; j++) {
 			tftp[i][j].tuple.dst.protonum = IPPROTO_UDP;
 			tftp[i][j].tuple.src.u.udp.port = htons(ports[i]);
-			tftp[i][j].mask.src.l3num = 0xFFFF;
-			tftp[i][j].mask.dst.protonum = 0xFF;
-			tftp[i][j].mask.src.u.udp.port = htons(0xFFFF);
 			tftp[i][j].max_expected = 1;
 			tftp[i][j].timeout = 5 * 60; /* 5 minutes */
 			tftp[i][j].me = THIS_MODULE;
-- 
cgit v0.10.2


From 4e1d4e6c5a448bd114e0cef6311c974cb7c7385e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:32:03 -0700
Subject: [NETFILTER]: nf_conntrack_expect: avoid useless list walking

Don't walk the list when unexpecting an expectation, we already
have a reference and the timer check is enough to guarantee
that it still is on the list.

This comment suggests that it was copied there by mistake from
expectation eviction:

/* choose the oldest expectation to evict */

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 83b5ad8..5c8cb0f 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -165,17 +165,10 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
 /* Generally a bad idea to call this: could have matched already. */
 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
 {
-	struct nf_conntrack_expect *i;
-
 	write_lock_bh(&nf_conntrack_lock);
-	/* choose the oldest expectation to evict */
-	list_for_each_entry_reverse(i, &nf_ct_expect_list, list) {
-		if (expect_matches(i, exp) && del_timer(&i->timeout)) {
-			nf_ct_unlink_expect(i);
-			write_unlock_bh(&nf_conntrack_lock);
-			nf_ct_expect_put(i);
-			return;
-		}
+	if (del_timer(&exp->timeout)) {
+		nf_ct_unlink_expect(exp);
+		nf_ct_expect_put(exp);
 	}
 	write_unlock_bh(&nf_conntrack_lock);
 }
-- 
cgit v0.10.2


From cf6994c2b9812a9f02b99e89df411ffc5db9c779 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:32:34 -0700
Subject: [NETFILTER]: nf_conntrack_netlink: sync expectation dumping with
 conntrack table dumping

Resync expectation table dumping code with conntrack dumping: don't
rely on the unique ID anymore since that requires to walk the list
backwards, which doesn't work with the upcoming conversion to hlists.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2064914..65a7ebc 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1235,32 +1235,50 @@ nfattr_failure:
 	return NOTIFY_DONE;
 }
 #endif
+static int ctnetlink_exp_done(struct netlink_callback *cb)
+{
+	if (cb->args[0])
+		nf_ct_expect_put((struct nf_conntrack_expect *)cb->args[0]);
+	return 0;
+}
 
 static int
 ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct nf_conntrack_expect *exp = NULL;
+	struct nf_conntrack_expect *exp, *last;
 	struct list_head *i;
-	u_int32_t *id = (u_int32_t *) &cb->args[0];
 	struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
 	u_int8_t l3proto = nfmsg->nfgen_family;
 
 	read_lock_bh(&nf_conntrack_lock);
+	last = (struct nf_conntrack_expect *)cb->args[0];
+restart:
 	list_for_each_prev(i, &nf_ct_expect_list) {
 		exp = (struct nf_conntrack_expect *) i;
 		if (l3proto && exp->tuple.src.l3num != l3proto)
 			continue;
-		if (exp->id <= *id)
-			continue;
+		if (cb->args[0]) {
+			if (exp != last)
+				continue;
+			cb->args[0] = 0;
+		}
 		if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
 					    cb->nlh->nlmsg_seq,
 					    IPCTNL_MSG_EXP_NEW,
-					    1, exp) < 0)
+					    1, exp) < 0) {
+			atomic_inc(&exp->use);
+			cb->args[0] = (unsigned long)exp;
 			goto out;
-		*id = exp->id;
+		}
+	}
+	if (cb->args[0]) {
+		cb->args[0] = 0;
+		goto restart;
 	}
 out:
 	read_unlock_bh(&nf_conntrack_lock);
+	if (last)
+		nf_ct_expect_put(last);
 
 	return skb->len;
 }
@@ -1287,7 +1305,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		return netlink_dump_start(ctnl, skb, nlh,
 					  ctnetlink_exp_dump_table,
-					  ctnetlink_done);
+					  ctnetlink_exp_done);
 	}
 
 	if (cda[CTA_EXPECT_MASTER-1])
-- 
cgit v0.10.2


From e9c1b084e17ca225b6be731b819308ee0f9e04b8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:32:53 -0700
Subject: [NETFILTER]: nf_conntrack: move expectaton related init code to
 nf_conntrack_expect.c

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 13643f7..cf6a619 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -7,8 +7,6 @@
 #include <net/netfilter/nf_conntrack.h>
 
 extern struct list_head nf_ct_expect_list;
-extern struct kmem_cache *nf_ct_expect_cachep;
-extern const struct file_operations exp_file_ops;
 
 struct nf_conntrack_expect
 {
@@ -53,6 +51,8 @@ struct nf_conntrack_expect
 
 #define NF_CT_EXPECT_PERMANENT 0x1
 
+int nf_conntrack_expect_init(void);
+void nf_conntrack_expect_fini(void);
 
 struct nf_conntrack_expect *
 __nf_ct_expect_find(const struct nf_conntrack_tuple *tuple);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 793f12f..ed44a09 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -961,12 +961,12 @@ void nf_conntrack_cleanup(void)
 	rcu_assign_pointer(nf_ct_destroy, NULL);
 
 	kmem_cache_destroy(nf_conntrack_cachep);
-	kmem_cache_destroy(nf_ct_expect_cachep);
 	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc,
 			     nf_conntrack_htable_size);
 
 	nf_conntrack_proto_fini();
 	nf_conntrack_helper_fini();
+	nf_conntrack_expect_fini();
 }
 
 struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced)
@@ -1088,21 +1088,17 @@ int __init nf_conntrack_init(void)
 		goto err_free_hash;
 	}
 
-	nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
-					sizeof(struct nf_conntrack_expect),
-					0, 0, NULL, NULL);
-	if (!nf_ct_expect_cachep) {
-		printk(KERN_ERR "Unable to create nf_expect slab cache\n");
+	ret = nf_conntrack_proto_init();
+	if (ret < 0)
 		goto err_free_conntrack_slab;
-	}
 
-	ret = nf_conntrack_proto_init();
+	ret = nf_conntrack_expect_init();
 	if (ret < 0)
-		goto out_free_expect_slab;
+		goto out_fini_proto;
 
 	ret = nf_conntrack_helper_init();
 	if (ret < 0)
-		goto out_fini_proto;
+		goto out_fini_expect;
 
 	/* For use by REJECT target */
 	rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach);
@@ -1116,10 +1112,10 @@ int __init nf_conntrack_init(void)
 
 	return ret;
 
+out_fini_expect:
+	nf_conntrack_expect_fini();
 out_fini_proto:
 	nf_conntrack_proto_fini();
-out_free_expect_slab:
-	kmem_cache_destroy(nf_ct_expect_cachep);
 err_free_conntrack_slab:
 	kmem_cache_destroy(nf_conntrack_cachep);
 err_free_hash:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 5c8cb0f..ad197bc 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -29,7 +29,7 @@
 LIST_HEAD(nf_ct_expect_list);
 EXPORT_SYMBOL_GPL(nf_ct_expect_list);
 
-struct kmem_cache *nf_ct_expect_cachep __read_mostly;
+static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 static unsigned int nf_ct_expect_next_id;
 
 /* nf_conntrack_expect helper functions */
@@ -413,3 +413,49 @@ const struct file_operations exp_file_ops = {
 	.release = seq_release
 };
 #endif /* CONFIG_PROC_FS */
+
+static int __init exp_proc_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *proc;
+
+	proc = proc_net_fops_create("nf_conntrack_expect", 0440, &exp_file_ops);
+	if (!proc)
+		return -ENOMEM;
+#endif /* CONFIG_PROC_FS */
+	return 0;
+}
+
+static void exp_proc_remove(void)
+{
+#ifdef CONFIG_PROC_FS
+	proc_net_remove("nf_conntrack_expect");
+#endif /* CONFIG_PROC_FS */
+}
+
+int __init nf_conntrack_expect_init(void)
+{
+	int err;
+
+	nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
+					sizeof(struct nf_conntrack_expect),
+					0, 0, NULL, NULL);
+	if (!nf_ct_expect_cachep)
+		return -ENOMEM;
+
+	err = exp_proc_init();
+	if (err < 0)
+		goto err1;
+
+	return 0;
+
+err1:
+	kmem_cache_destroy(nf_ct_expect_cachep);
+	return err;
+}
+
+void nf_conntrack_expect_fini(void)
+{
+	exp_proc_remove();
+	kmem_cache_destroy(nf_ct_expect_cachep);
+}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index fe536b2..098e799 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -411,7 +411,7 @@ EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
 static int __init nf_conntrack_standalone_init(void)
 {
 #ifdef CONFIG_PROC_FS
-	struct proc_dir_entry *proc, *proc_exp, *proc_stat;
+	struct proc_dir_entry *proc, *proc_stat;
 #endif
 	int ret = 0;
 
@@ -423,13 +423,9 @@ static int __init nf_conntrack_standalone_init(void)
 	proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops);
 	if (!proc) goto cleanup_init;
 
-	proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440,
-					&exp_file_ops);
-	if (!proc_exp) goto cleanup_proc;
-
 	proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat);
 	if (!proc_stat)
-		goto cleanup_proc_exp;
+		goto cleanup_proc;
 
 	proc_stat->proc_fops = &ct_cpu_seq_fops;
 	proc_stat->owner = THIS_MODULE;
@@ -449,8 +445,6 @@ static int __init nf_conntrack_standalone_init(void)
 #endif
 #ifdef CONFIG_PROC_FS
 	remove_proc_entry("nf_conntrack", proc_net_stat);
- cleanup_proc_exp:
-	proc_net_remove("nf_conntrack_expect");
  cleanup_proc:
 	proc_net_remove("nf_conntrack");
  cleanup_init:
@@ -466,7 +460,6 @@ static void __exit nf_conntrack_standalone_fini(void)
 #endif
 #ifdef CONFIG_PROC_FS
 	remove_proc_entry("nf_conntrack", proc_net_stat);
-	proc_net_remove("nf_conntrack_expect");
 	proc_net_remove("nf_conntrack");
 #endif /* CNFIG_PROC_FS */
 	nf_conntrack_cleanup();
-- 
cgit v0.10.2


From a71c085562bcc99e8b711cab4222bff1f6e955da Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:33:47 -0700
Subject: [NETFILTER]: nf_conntrack: use hashtable for expectations

Currently all expectations are kept on a global list that

- needs to be searched for every new conncetion
- needs to be walked for evicting expectations when a master connection
  has reached its limit
- needs to be walked on connection destruction for connections that
  have open expectations

This is obviously not good, especially when considering helpers like
H.323 that register *lots* of expectations and can set up permanent
expectations, but it also allows for an easy DoS against firewalls
using connection tracking helpers.

Use a hashtable for expectations to avoid incurring the search overhead
for every new connection. The default hash size is 1/256 of the conntrack
hash table size, this can be overriden using a module parameter.

This patch only introduces the hash table for expectation lookups and
keeps other users to reduce the noise, the following patches will get
rid of it completely.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index a18f79c..4056f5f 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -84,7 +84,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
 	    struct nf_conntrack_l4proto *proto);
 
 extern struct hlist_head *nf_conntrack_hash;
-extern struct list_head nf_ct_expect_list;
 extern rwlock_t nf_conntrack_lock ;
 extern struct hlist_head unconfirmed;
 
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index cf6a619..424d4bd 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -7,12 +7,17 @@
 #include <net/netfilter/nf_conntrack.h>
 
 extern struct list_head nf_ct_expect_list;
+extern struct hlist_head *nf_ct_expect_hash;
+extern unsigned int nf_ct_expect_hsize;
 
 struct nf_conntrack_expect
 {
 	/* Internal linked list (global expectation list) */
 	struct list_head list;
 
+	/* Hash member */
+	struct hlist_node hnode;
+
 	/* We expect this tuple, with the following mask */
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_mask mask;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index ad197bc..0696f87 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -19,6 +19,7 @@
 #include <linux/err.h>
 #include <linux/percpu.h>
 #include <linux/kernel.h>
+#include <linux/jhash.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
@@ -29,6 +30,17 @@
 LIST_HEAD(nf_ct_expect_list);
 EXPORT_SYMBOL_GPL(nf_ct_expect_list);
 
+struct hlist_head *nf_ct_expect_hash __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
+
+unsigned int nf_ct_expect_hsize __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
+
+static unsigned int nf_ct_expect_hash_rnd __read_mostly;
+static unsigned int nf_ct_expect_count;
+static int nf_ct_expect_hash_rnd_initted __read_mostly;
+static int nf_ct_expect_vmalloc;
+
 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 static unsigned int nf_ct_expect_next_id;
 
@@ -41,6 +53,9 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
 	NF_CT_ASSERT(!timer_pending(&exp->timeout));
 
 	list_del(&exp->list);
+	hlist_del(&exp->hnode);
+	nf_ct_expect_count--;
+
 	NF_CT_STAT_INC(expect_delete);
 	master_help->expecting--;
 	nf_ct_expect_put(exp);
@@ -57,12 +72,31 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect)
 	nf_ct_expect_put(exp);
 }
 
+static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
+{
+	if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
+		get_random_bytes(&nf_ct_expect_hash_rnd, 4);
+		nf_ct_expect_hash_rnd_initted = 1;
+	}
+
+	return jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
+		      (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
+		       tuple->dst.u.all) ^ nf_ct_expect_hash_rnd) %
+	       nf_ct_expect_hsize;
+}
+
 struct nf_conntrack_expect *
 __nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
+	struct hlist_node *n;
+	unsigned int h;
+
+	if (!nf_ct_expect_count)
+		return NULL;
 
-	list_for_each_entry(i, &nf_ct_expect_list, list) {
+	h = nf_ct_expect_dst_hash(tuple);
+	hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
 		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
 			return i;
 	}
@@ -252,10 +286,14 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_put);
 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 {
 	struct nf_conn_help *master_help = nfct_help(exp->master);
+	unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
 
 	atomic_inc(&exp->use);
 	master_help->expecting++;
+
 	list_add(&exp->list, &nf_ct_expect_list);
+	hlist_add_head(&exp->hnode, &nf_ct_expect_hash[h]);
+	nf_ct_expect_count++;
 
 	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
 		    (unsigned long)exp);
@@ -300,6 +338,8 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
 	struct nf_conntrack_expect *i;
 	struct nf_conn *master = expect->master;
 	struct nf_conn_help *master_help = nfct_help(master);
+	struct hlist_node *n;
+	unsigned int h;
 	int ret;
 
 	NF_CT_ASSERT(master_help);
@@ -309,7 +349,8 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
 		ret = -ESHUTDOWN;
 		goto out;
 	}
-	list_for_each_entry(i, &nf_ct_expect_list, list) {
+	h = nf_ct_expect_dst_hash(&expect->tuple);
+	hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
 		if (expect_matches(i, expect)) {
 			/* Refresh timer: if it's dying, ignore.. */
 			if (refresh_timer(i)) {
@@ -433,24 +474,41 @@ static void exp_proc_remove(void)
 #endif /* CONFIG_PROC_FS */
 }
 
+module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
+
 int __init nf_conntrack_expect_init(void)
 {
-	int err;
+	int err = -ENOMEM;
+
+	if (!nf_ct_expect_hsize) {
+		nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+		if (!nf_ct_expect_hsize)
+			nf_ct_expect_hsize = 1;
+	}
+
+	nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
+						  &nf_ct_expect_vmalloc);
+	if (nf_ct_expect_hash == NULL)
+		goto err1;
 
 	nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
 					sizeof(struct nf_conntrack_expect),
 					0, 0, NULL, NULL);
 	if (!nf_ct_expect_cachep)
-		return -ENOMEM;
+		goto err2;
 
 	err = exp_proc_init();
 	if (err < 0)
-		goto err1;
+		goto err3;
 
 	return 0;
 
-err1:
+err3:
+	nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
+			     nf_ct_expect_hsize);
+err2:
 	kmem_cache_destroy(nf_ct_expect_cachep);
+err1:
 	return err;
 }
 
@@ -458,4 +516,6 @@ void nf_conntrack_expect_fini(void)
 {
 	exp_proc_remove();
 	kmem_cache_destroy(nf_ct_expect_cachep);
+	nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
+			     nf_ct_expect_hsize);
 }
-- 
cgit v0.10.2


From 5d08ad440feae11b8d6e7599147a8a20ac60f99a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:34:07 -0700
Subject: [NETFILTER]: nf_conntrack_expect: convert proc functions to hash

Convert from the global expectation list to the hash table.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 12d6a63..ab8e4c6 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -207,47 +207,68 @@ static const struct file_operations ct_file_ops = {
 };
 
 /* expects */
-static void *exp_seq_start(struct seq_file *s, loff_t *pos)
+struct ct_expect_iter_state {
+	unsigned int bucket;
+};
+
+static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 {
-	struct list_head *e = &nf_ct_expect_list;
-	loff_t i;
+	struct ct_expect_iter_state *st = seq->private;
 
-	/* strange seq_file api calls stop even if we fail,
-	 * thus we need to grab lock since stop unlocks */
-	read_lock_bh(&nf_conntrack_lock);
+	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
+		if (!hlist_empty(&nf_ct_expect_hash[st->bucket]))
+			return nf_ct_expect_hash[st->bucket].first;
+	}
+	return NULL;
+}
 
-	if (list_empty(e))
-		return NULL;
+static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
+					     struct hlist_node *head)
+{
+	struct ct_expect_iter_state *st = seq->private;
 
-	for (i = 0; i <= *pos; i++) {
-		e = e->next;
-		if (e == &nf_ct_expect_list)
+	head = head->next;
+	while (head == NULL) {
+		if (++st->bucket >= nf_ct_expect_hsize)
 			return NULL;
+		head = nf_ct_expect_hash[st->bucket].first;
 	}
-	return e;
+	return head;
 }
 
-static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
+static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
 {
-	struct list_head *e = v;
+	struct hlist_node *head = ct_expect_get_first(seq);
 
-	++*pos;
-	e = e->next;
+	if (head)
+		while (pos && (head = ct_expect_get_next(seq, head)))
+			pos--;
+	return pos ? NULL : head;
+}
 
-	if (e == &nf_ct_expect_list)
-		return NULL;
+static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock_bh(&nf_conntrack_lock);
+	return ct_expect_get_idx(seq, *pos);
+}
 
-	return e;
+static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return ct_expect_get_next(seq, v);
 }
 
-static void exp_seq_stop(struct seq_file *s, void *v)
+static void exp_seq_stop(struct seq_file *seq, void *v)
 {
 	read_unlock_bh(&nf_conntrack_lock);
 }
 
 static int exp_seq_show(struct seq_file *s, void *v)
 {
-	struct nf_conntrack_expect *exp = v;
+	struct nf_conntrack_expect *exp;
+	struct hlist_node *n = v;
+
+	exp = hlist_entry(n, struct nf_conntrack_expect, hnode);
 
 	if (exp->tuple.src.l3num != AF_INET)
 		return 0;
@@ -276,7 +297,23 @@ static struct seq_operations exp_seq_ops = {
 
 static int exp_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &exp_seq_ops);
+	struct seq_file *seq;
+	struct ct_expect_iter_state *st;
+	int ret;
+
+	st = kmalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL);
+	if (st == NULL)
+		return -ENOMEM;
+	ret = seq_open(file, &exp_seq_ops);
+	if (ret)
+		goto out_free;
+	seq          = file->private_data;
+	seq->private = st;
+	memset(st, 0, sizeof(struct ct_expect_iter_state));
+	return ret;
+out_free:
+	kfree(st);
+	return ret;
 }
 
 static const struct file_operations ip_exp_file_ops = {
@@ -284,7 +321,7 @@ static const struct file_operations ip_exp_file_ops = {
 	.open    = exp_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release
+	.release = seq_release_private,
 };
 
 static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 0696f87..c5006b0 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -377,47 +377,68 @@ out:
 EXPORT_SYMBOL_GPL(nf_ct_expect_related);
 
 #ifdef CONFIG_PROC_FS
-static void *exp_seq_start(struct seq_file *s, loff_t *pos)
+struct ct_expect_iter_state {
+	unsigned int bucket;
+};
+
+static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 {
-	struct list_head *e = &nf_ct_expect_list;
-	loff_t i;
+	struct ct_expect_iter_state *st = seq->private;
 
-	/* strange seq_file api calls stop even if we fail,
-	 * thus we need to grab lock since stop unlocks */
-	read_lock_bh(&nf_conntrack_lock);
+	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
+		if (!hlist_empty(&nf_ct_expect_hash[st->bucket]))
+			return nf_ct_expect_hash[st->bucket].first;
+	}
+	return NULL;
+}
 
-	if (list_empty(e))
-		return NULL;
+static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
+					     struct hlist_node *head)
+{
+	struct ct_expect_iter_state *st = seq->private;
 
-	for (i = 0; i <= *pos; i++) {
-		e = e->next;
-		if (e == &nf_ct_expect_list)
+	head = head->next;
+	while (head == NULL) {
+		if (++st->bucket >= nf_ct_expect_hsize)
 			return NULL;
+		head = nf_ct_expect_hash[st->bucket].first;
 	}
-	return e;
+	return head;
 }
 
-static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
+static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
 {
-	struct list_head *e = v;
+	struct hlist_node *head = ct_expect_get_first(seq);
 
-	++*pos;
-	e = e->next;
+	if (head)
+		while (pos && (head = ct_expect_get_next(seq, head)))
+			pos--;
+	return pos ? NULL : head;
+}
 
-	if (e == &nf_ct_expect_list)
-		return NULL;
+static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock_bh(&nf_conntrack_lock);
+	return ct_expect_get_idx(seq, *pos);
+}
 
-	return e;
+static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return ct_expect_get_next(seq, v);
 }
 
-static void exp_seq_stop(struct seq_file *s, void *v)
+static void exp_seq_stop(struct seq_file *seq, void *v)
 {
 	read_unlock_bh(&nf_conntrack_lock);
 }
 
 static int exp_seq_show(struct seq_file *s, void *v)
 {
-	struct nf_conntrack_expect *expect = v;
+	struct nf_conntrack_expect *expect;
+	struct hlist_node *n = v;
+
+	expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
 
 	if (expect->timeout.function)
 		seq_printf(s, "%ld ", timer_pending(&expect->timeout)
@@ -443,15 +464,31 @@ static struct seq_operations exp_seq_ops = {
 
 static int exp_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &exp_seq_ops);
+	struct seq_file *seq;
+	struct ct_expect_iter_state *st;
+	int ret;
+
+	st = kmalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL);
+	if (st == NULL)
+		return -ENOMEM;
+	ret = seq_open(file, &exp_seq_ops);
+	if (ret)
+		goto out_free;
+	seq          = file->private_data;
+	seq->private = st;
+	memset(st, 0, sizeof(struct ct_expect_iter_state));
+	return ret;
+out_free:
+	kfree(st);
+	return ret;
 }
 
-const struct file_operations exp_file_ops = {
+static const struct file_operations exp_file_ops = {
 	.owner   = THIS_MODULE,
 	.open    = exp_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release
+	.release = seq_release_private,
 };
 #endif /* CONFIG_PROC_FS */
 
-- 
cgit v0.10.2


From 31f15875c5ad98a13b528aaf19c839e22b43dc9a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:35:21 -0700
Subject: [NETFILTER]: nf_conntrack_helper/nf_conntrack_netlink: convert to
 expectation hash

Convert from the global expectation list to the hash table.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index fdabf82..cc8ae74 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -114,22 +114,25 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
 
 void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 {
-	unsigned int i;
 	struct nf_conntrack_tuple_hash *h;
-	struct nf_conntrack_expect *exp, *tmp;
-	struct hlist_node *n;
+	struct nf_conntrack_expect *exp;
+	struct hlist_node *n, *next;
+	unsigned int i;
 
 	/* Need write lock here, to delete helper. */
 	write_lock_bh(&nf_conntrack_lock);
 	list_del(&me->list);
 
 	/* Get rid of expectations */
-	list_for_each_entry_safe(exp, tmp, &nf_ct_expect_list, list) {
-		struct nf_conn_help *help = nfct_help(exp->master);
-		if ((help->helper == me || exp->helper == me) &&
-		    del_timer(&exp->timeout)) {
-			nf_ct_unlink_expect(exp);
-			nf_ct_expect_put(exp);
+	for (i = 0; i < nf_ct_expect_hsize; i++) {
+		hlist_for_each_entry_safe(exp, n, next,
+					  &nf_ct_expect_hash[i], hnode) {
+			struct nf_conn_help *help = nfct_help(exp->master);
+			if ((help->helper == me || exp->helper == me) &&
+			    del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect(exp);
+				nf_ct_expect_put(exp);
+			}
 		}
 	}
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 65a7ebc..60af9b6 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1237,8 +1237,8 @@ nfattr_failure:
 #endif
 static int ctnetlink_exp_done(struct netlink_callback *cb)
 {
-	if (cb->args[0])
-		nf_ct_expect_put((struct nf_conntrack_expect *)cb->args[0]);
+	if (cb->args[1])
+		nf_ct_expect_put((struct nf_conntrack_expect *)cb->args[1]);
 	return 0;
 }
 
@@ -1246,35 +1246,37 @@ static int
 ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct nf_conntrack_expect *exp, *last;
-	struct list_head *i;
 	struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+	struct hlist_node *n;
 	u_int8_t l3proto = nfmsg->nfgen_family;
 
 	read_lock_bh(&nf_conntrack_lock);
-	last = (struct nf_conntrack_expect *)cb->args[0];
+	last = (struct nf_conntrack_expect *)cb->args[1];
+	for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
 restart:
-	list_for_each_prev(i, &nf_ct_expect_list) {
-		exp = (struct nf_conntrack_expect *) i;
-		if (l3proto && exp->tuple.src.l3num != l3proto)
-			continue;
-		if (cb->args[0]) {
-			if (exp != last)
+		hlist_for_each_entry(exp, n, &nf_ct_expect_hash[cb->args[0]],
+				     hnode) {
+			if (l3proto && exp->tuple.src.l3num != l3proto)
 				continue;
-			cb->args[0] = 0;
+			if (cb->args[1]) {
+				if (exp != last)
+					continue;
+				cb->args[1] = 0;
+			}
+			if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
+						    cb->nlh->nlmsg_seq,
+						    IPCTNL_MSG_EXP_NEW,
+						    1, exp) < 0) {
+				atomic_inc(&exp->use);
+				cb->args[1] = (unsigned long)exp;
+				goto out;
+			}
 		}
-		if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
-					    cb->nlh->nlmsg_seq,
-					    IPCTNL_MSG_EXP_NEW,
-					    1, exp) < 0) {
-			atomic_inc(&exp->use);
-			cb->args[0] = (unsigned long)exp;
-			goto out;
+		if (cb->args[1]) {
+			cb->args[1] = 0;
+			goto restart;
 		}
 	}
-	if (cb->args[0]) {
-		cb->args[0] = 0;
-		goto restart;
-	}
 out:
 	read_unlock_bh(&nf_conntrack_lock);
 	if (last)
@@ -1354,11 +1356,13 @@ static int
 ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		     struct nlmsghdr *nlh, struct nfattr *cda[])
 {
-	struct nf_conntrack_expect *exp, *tmp;
+	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_helper *h;
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	struct hlist_node *n, *next;
 	u_int8_t u3 = nfmsg->nfgen_family;
+	unsigned int i;
 	int err;
 
 	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
@@ -1390,6 +1394,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		nf_ct_expect_put(exp);
 	} else if (cda[CTA_EXPECT_HELP_NAME-1]) {
 		char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]);
+		struct nf_conn_help *m_help;
 
 		/* delete all expectations for this helper */
 		write_lock_bh(&nf_conntrack_lock);
@@ -1398,22 +1403,30 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			write_unlock_bh(&nf_conntrack_lock);
 			return -EINVAL;
 		}
-		list_for_each_entry_safe(exp, tmp, &nf_ct_expect_list, list) {
-			struct nf_conn_help *m_help = nfct_help(exp->master);
-			if (m_help->helper == h
-			    && del_timer(&exp->timeout)) {
-				nf_ct_unlink_expect(exp);
-				nf_ct_expect_put(exp);
+		for (i = 0; i < nf_ct_expect_hsize; i++) {
+			hlist_for_each_entry_safe(exp, n, next,
+						  &nf_ct_expect_hash[i],
+						  hnode) {
+				m_help = nfct_help(exp->master);
+				if (m_help->helper == h
+				    && del_timer(&exp->timeout)) {
+					nf_ct_unlink_expect(exp);
+					nf_ct_expect_put(exp);
+				}
 			}
 		}
 		write_unlock_bh(&nf_conntrack_lock);
 	} else {
 		/* This basically means we have to flush everything*/
 		write_lock_bh(&nf_conntrack_lock);
-		list_for_each_entry_safe(exp, tmp, &nf_ct_expect_list, list) {
-			if (del_timer(&exp->timeout)) {
-				nf_ct_unlink_expect(exp);
-				nf_ct_expect_put(exp);
+		for (i = 0; i < nf_ct_expect_hsize; i++) {
+			hlist_for_each_entry_safe(exp, n, next,
+						  &nf_ct_expect_hash[i],
+						  hnode) {
+				if (del_timer(&exp->timeout)) {
+					nf_ct_unlink_expect(exp);
+					nf_ct_expect_put(exp);
+				}
 			}
 		}
 		write_unlock_bh(&nf_conntrack_lock);
-- 
cgit v0.10.2


From b560580a13b180bc1e3cad7ffbc93388cc39be5d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:35:56 -0700
Subject: [NETFILTER]: nf_conntrack_expect: maintain per conntrack expectation
 list

This patch brings back the per-conntrack expectation list that was
removed around 2.6.10 to avoid walking all expectations on expectation
eviction and conntrack destruction.

As these were the last users of the global expectation list, this patch
also kills that.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 8f2cbb9..d4f02eb 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -82,6 +82,8 @@ struct nf_conn_help {
 
 	union nf_conntrack_help help;
 
+	struct hlist_head expectations;
+
 	/* Current number of expected connections */
 	unsigned int expecting;
 };
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 424d4bd..9d5af4e 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -6,14 +6,13 @@
 #define _NF_CONNTRACK_EXPECT_H
 #include <net/netfilter/nf_conntrack.h>
 
-extern struct list_head nf_ct_expect_list;
 extern struct hlist_head *nf_ct_expect_hash;
 extern unsigned int nf_ct_expect_hsize;
 
 struct nf_conntrack_expect
 {
-	/* Internal linked list (global expectation list) */
-	struct list_head list;
+	/* Conntrack expectation list member */
+	struct hlist_node lnode;
 
 	/* Hash member */
 	struct hlist_node hnode;
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index d62e6f0..2c0e2e0 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -52,6 +52,8 @@ extern void nf_ct_helper_put(struct nf_conntrack_helper *helper);
 extern int nf_conntrack_helper_register(struct nf_conntrack_helper *);
 extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *);
 
+extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
+
 static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
 {
 	return nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ed44a09..d1fc019 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -502,12 +502,9 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 		__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
 		conntrack->master = exp->master;
 		if (exp->helper) {
-			help = nf_ct_ext_add(conntrack, NF_CT_EXT_HELPER,
-					     GFP_ATOMIC);
+			help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC);
 			if (help)
 				rcu_assign_pointer(help->helper, exp->helper);
-			else
-				DEBUGP("failed to add helper extension area");
 		}
 
 #ifdef CONFIG_NF_CONNTRACK_MARK
@@ -523,14 +520,9 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 
 		helper = __nf_ct_helper_find(&repl_tuple);
 		if (helper) {
-			help = nf_ct_ext_add(conntrack, NF_CT_EXT_HELPER,
-					     GFP_ATOMIC);
+			help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC);
 			if (help)
-				/* not in hash table yet, so not strictly
-				   necessary */
 				rcu_assign_pointer(help->helper, helper);
-			else
-				DEBUGP("failed to add helper extension area");
 		}
 		NF_CT_STAT_INC(new);
 	}
@@ -721,11 +713,9 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 	}
 
 	if (help == NULL) {
-		help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, GFP_ATOMIC);
-		if (help == NULL) {
-			DEBUGP("failed to add helper extension area");
+		help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+		if (help == NULL)
 			goto out;
-		}
 	} else {
 		memset(&help->help, 0, sizeof(help->help));
 	}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index c5006b0..5ef0dd4 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -27,9 +27,6 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 
-LIST_HEAD(nf_ct_expect_list);
-EXPORT_SYMBOL_GPL(nf_ct_expect_list);
-
 struct hlist_head *nf_ct_expect_hash __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
 
@@ -52,13 +49,14 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
 	NF_CT_ASSERT(master_help);
 	NF_CT_ASSERT(!timer_pending(&exp->timeout));
 
-	list_del(&exp->list);
 	hlist_del(&exp->hnode);
 	nf_ct_expect_count--;
 
-	NF_CT_STAT_INC(expect_delete);
+	hlist_del(&exp->lnode);
 	master_help->expecting--;
 	nf_ct_expect_put(exp);
+
+	NF_CT_STAT_INC(expect_delete);
 }
 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
 
@@ -153,17 +151,18 @@ nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple)
 /* delete all expectations for this conntrack */
 void nf_ct_remove_expectations(struct nf_conn *ct)
 {
-	struct nf_conntrack_expect *i, *tmp;
 	struct nf_conn_help *help = nfct_help(ct);
+	struct nf_conntrack_expect *exp;
+	struct hlist_node *n, *next;
 
 	/* Optimization: most connection never expect any others. */
 	if (!help || help->expecting == 0)
 		return;
 
-	list_for_each_entry_safe(i, tmp, &nf_ct_expect_list, list) {
-		if (i->master == ct && del_timer(&i->timeout)) {
-			nf_ct_unlink_expect(i);
-			nf_ct_expect_put(i);
+	hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
+		if (del_timer(&exp->timeout)) {
+			nf_ct_unlink_expect(exp);
+			nf_ct_expect_put(exp);
 		}
 	}
 }
@@ -289,9 +288,10 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
 
 	atomic_inc(&exp->use);
+
+	hlist_add_head(&exp->lnode, &master_help->expectations);
 	master_help->expecting++;
 
-	list_add(&exp->list, &nf_ct_expect_list);
 	hlist_add_head(&exp->hnode, &nf_ct_expect_hash[h]);
 	nf_ct_expect_count++;
 
@@ -308,16 +308,16 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 /* Race with expectations being used means we could have none to find; OK. */
 static void evict_oldest_expect(struct nf_conn *master)
 {
-	struct nf_conntrack_expect *i;
+	struct nf_conn_help *master_help = nfct_help(master);
+	struct nf_conntrack_expect *exp = NULL;
+	struct hlist_node *n;
 
-	list_for_each_entry_reverse(i, &nf_ct_expect_list, list) {
-		if (i->master == master) {
-			if (del_timer(&i->timeout)) {
-				nf_ct_unlink_expect(i);
-				nf_ct_expect_put(i);
-			}
-			break;
-		}
+	hlist_for_each_entry(exp, n, &master_help->expectations, lnode)
+		; /* nothing */
+
+	if (exp && del_timer(&exp->timeout)) {
+		nf_ct_unlink_expect(exp);
+		nf_ct_expect_put(exp);
 	}
 }
 
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index cc8ae74..66c209d 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -87,6 +87,19 @@ __nf_conntrack_helper_find_byname(const char *name)
 }
 EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname);
 
+struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
+{
+	struct nf_conn_help *help;
+
+	help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp);
+	if (help)
+		INIT_HLIST_HEAD(&help->expectations);
+	else
+		pr_debug("failed to add helper extension area");
+	return help;
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
+
 static inline int unhelp(struct nf_conntrack_tuple_hash *i,
 			 const struct nf_conntrack_helper *me)
 {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 60af9b6..6f89b10 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -868,7 +868,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[])
 		/* need to zero data of old helper */
 		memset(&help->help, 0, sizeof(help->help));
 	} else {
-		help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, GFP_KERNEL);
+		help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
 		if (help == NULL)
 			return -ENOMEM;
 	}
@@ -989,7 +989,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
 
 	helper = nf_ct_helper_find_get(rtuple);
 	if (helper) {
-		help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, GFP_KERNEL);
+		help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
 		if (help == NULL) {
 			nf_ct_helper_put(helper);
 			err = -ENOMEM;
-- 
cgit v0.10.2


From f264a7df08d50bb4a23be6a9aa06940e497ac1c4 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:36:24 -0700
Subject: [NETFILTER]: nf_conntrack_expect: introduce nf_conntrack_expect_max
 sysct

As a last step of preventing DoS by creating lots of expectations, this
patch introduces a global maximum and a sysctl to control it. The default
is initialized to 4 * the expectation hash table size, which results in
1/64 of the default maxmimum of conntracks.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 9d5af4e..cae1a0d 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -8,6 +8,7 @@
 
 extern struct hlist_head *nf_ct_expect_hash;
 extern unsigned int nf_ct_expect_hsize;
+extern unsigned int nf_ct_expect_max;
 
 struct nf_conntrack_expect
 {
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 5ef0dd4..513828f 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -35,6 +35,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
 
 static unsigned int nf_ct_expect_hash_rnd __read_mostly;
 static unsigned int nf_ct_expect_count;
+unsigned int nf_ct_expect_max __read_mostly;
 static int nf_ct_expect_hash_rnd_initted __read_mostly;
 static int nf_ct_expect_vmalloc;
 
@@ -367,6 +368,14 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
 	    master_help->expecting >= master_help->helper->max_expected)
 		evict_oldest_expect(master);
 
+	if (nf_ct_expect_count >= nf_ct_expect_max) {
+		if (net_ratelimit())
+			printk(KERN_WARNING
+			       "nf_conntrack: expectation table full");
+		ret = -EMFILE;
+		goto out;
+	}
+
 	nf_ct_expect_insert(expect);
 	nf_ct_expect_event(IPEXP_NEW, expect);
 	ret = 0;
@@ -522,6 +531,7 @@ int __init nf_conntrack_expect_init(void)
 		if (!nf_ct_expect_hsize)
 			nf_ct_expect_hsize = 1;
 	}
+	nf_ct_expect_max = nf_ct_expect_hsize * 4;
 
 	nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
 						  &nf_ct_expect_vmalloc);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 098e799..6af96c6 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -372,7 +372,14 @@ static ctl_table nf_ct_sysctl_table[] = {
 		.extra1		= &log_invalid_proto_min,
 		.extra2		= &log_invalid_proto_max,
 	},
-
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "nf_conntrack_expect_max",
+		.data		= &nf_ct_expect_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{ .ctl_name = 0 }
 };
 
-- 
cgit v0.10.2


From b8a7fe6c10511fce10b20efa163123f4041f2550 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:36:46 -0700
Subject: [NETFILTER]: nf_conntrack_helper: use hashtable for conntrack helpers

Eliminate the last global list searched for every new connection.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 2c0e2e0..d04f999 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -15,8 +15,8 @@
 struct module;
 
 struct nf_conntrack_helper
-{	
-	struct list_head list; 		/* Internal use. */
+{
+	struct hlist_node hnode;	/* Internal use. */
 
 	const char *name;		/* name of the module */
 	struct module *me;		/* pointer to self */
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 66c209d..b1179dd 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -28,23 +28,41 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 
-static __read_mostly LIST_HEAD(helpers);
+static struct hlist_head *nf_ct_helper_hash __read_mostly;
+static unsigned int nf_ct_helper_hsize __read_mostly;
+static unsigned int nf_ct_helper_count __read_mostly;
+static int nf_ct_helper_vmalloc;
+
+
+/* Stupid hash, but collision free for the default registrations of the
+ * helpers currently in the kernel. */
+static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
+{
+	return (((tuple->src.l3num << 8) | tuple->dst.protonum) ^
+		tuple->src.u.all) % nf_ct_helper_hsize;
+}
 
 struct nf_conntrack_helper *
 __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
 {
-	struct nf_conntrack_helper *h;
+	struct nf_conntrack_helper *helper;
 	struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) };
+	struct hlist_node *n;
+	unsigned int h;
 
-	list_for_each_entry(h, &helpers, list) {
-		if (nf_ct_tuple_src_mask_cmp(tuple, &h->tuple, &mask))
-			return h;
+	if (!nf_ct_helper_count)
+		return NULL;
+
+	h = helper_hash(tuple);
+	hlist_for_each_entry(helper, n, &nf_ct_helper_hash[h], hnode) {
+		if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask))
+			return helper;
 	}
 	return NULL;
 }
 
 struct nf_conntrack_helper *
-nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple)
+nf_ct_helper_find_get(const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_helper *helper;
 
@@ -77,12 +95,15 @@ struct nf_conntrack_helper *
 __nf_conntrack_helper_find_byname(const char *name)
 {
 	struct nf_conntrack_helper *h;
+	struct hlist_node *n;
+	unsigned int i;
 
-	list_for_each_entry(h, &helpers, list) {
-		if (!strcmp(h->name, name))
-			return h;
+	for (i = 0; i < nf_ct_helper_hsize; i++) {
+		hlist_for_each_entry(h, n, &nf_ct_helper_hash[i], hnode) {
+			if (!strcmp(h->name, name))
+				return h;
+		}
 	}
-
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname);
@@ -115,10 +136,13 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
 
 int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 {
+	unsigned int h = helper_hash(&me->tuple);
+
 	BUG_ON(me->timeout == 0);
 
 	write_lock_bh(&nf_conntrack_lock);
-	list_add(&me->list, &helpers);
+	hlist_add_head(&me->hnode, &nf_ct_helper_hash[h]);
+	nf_ct_helper_count++;
 	write_unlock_bh(&nf_conntrack_lock);
 
 	return 0;
@@ -134,7 +158,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 
 	/* Need write lock here, to delete helper. */
 	write_lock_bh(&nf_conntrack_lock);
-	list_del(&me->list);
+	hlist_del(&me->hnode);
+	nf_ct_helper_count--;
 
 	/* Get rid of expectations */
 	for (i = 0; i < nf_ct_expect_hsize; i++) {
@@ -171,10 +196,29 @@ static struct nf_ct_ext_type helper_extend __read_mostly = {
 
 int nf_conntrack_helper_init()
 {
-	return nf_ct_extend_register(&helper_extend);
+	int err;
+
+	nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
+	nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize,
+						  &nf_ct_helper_vmalloc);
+	if (!nf_ct_helper_hash)
+		return -ENOMEM;
+
+	err = nf_ct_extend_register(&helper_extend);
+	if (err < 0)
+		goto err1;
+
+	return 0;
+
+err1:
+	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
+			     nf_ct_helper_hsize);
+	return err;
 }
 
 void nf_conntrack_helper_fini()
 {
 	nf_ct_extend_unregister(&helper_extend);
+	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
+			     nf_ct_helper_hsize);
 }
-- 
cgit v0.10.2


From ec59a1110aee6846adada8979915cacae64042ce Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:37:03 -0700
Subject: [NETFILTER]: nf_conntrack: mark helpers __read_mostly

Most are __read_mostly already, this changes the remaining ones.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 198330b..cd9c2d0 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -518,8 +518,8 @@ out_update_nl:
 	return ret;
 }
 
-static struct nf_conntrack_helper ftp[MAX_PORTS][2];
-static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")];
+static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly;
+static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")] __read_mostly;
 
 /* don't make this __exit, since it's called from __init ! */
 static void nf_conntrack_ftp_fini(void)
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index edd10df..627eda7 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -166,8 +166,8 @@ out:
 	return ret;
 }
 
-static struct nf_conntrack_helper sane[MAX_PORTS][2];
-static char sane_names[MAX_PORTS][2][sizeof("sane-65535")];
+static struct nf_conntrack_helper sane[MAX_PORTS][2] __read_mostly;
+static char sane_names[MAX_PORTS][2][sizeof("sane-65535")] __read_mostly;
 
 /* don't make this __exit, since it's called from __init ! */
 static void nf_conntrack_sane_fini(void)
-- 
cgit v0.10.2


From 7ae7730fd6d98be1afe8ad9ea77813de607ec970 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:37:38 -0700
Subject: [NETFILTER]: nf_conntrack: early_drop improvement

When the maximum number of conntrack entries is reached and a new
one needs to be allocated, conntrack tries to drop an unassured
connection from the same hash bucket the new conntrack would hash
to. Since with a properly sized hash the average number of entries
per bucket is 1, the chances of actually finding one are not very
good. This patch makes it walk the hash until a minimum number of
8 entries are checked.

Based on patch by Vasily Averin <vvs@sw.ru>.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d1fc019..472396d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -377,21 +377,30 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
 
+#define NF_CT_EVICTION_RANGE	8
+
 /* There's a small race here where we may free a just-assured
    connection.  Too bad: we're in trouble anyway. */
-static int early_drop(struct hlist_head *chain)
+static int early_drop(unsigned int hash)
 {
 	/* Use oldest entry, which is roughly LRU */
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct = NULL, *tmp;
 	struct hlist_node *n;
+	unsigned int i, cnt = 0;
 	int dropped = 0;
 
 	read_lock_bh(&nf_conntrack_lock);
-	hlist_for_each_entry(h, n, chain, hnode) {
-		tmp = nf_ct_tuplehash_to_ctrack(h);
-		if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
-			ct = tmp;
+	for (i = 0; i < nf_conntrack_htable_size; i++) {
+		hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) {
+			tmp = nf_ct_tuplehash_to_ctrack(h);
+			if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
+				ct = tmp;
+			cnt++;
+		}
+		if (ct || cnt >= NF_CT_EVICTION_RANGE)
+			break;
+		hash = (hash + 1) % nf_conntrack_htable_size;
 	}
 	if (ct)
 		atomic_inc(&ct->ct_general.use);
@@ -425,8 +434,7 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 	if (nf_conntrack_max
 	    && atomic_read(&nf_conntrack_count) > nf_conntrack_max) {
 		unsigned int hash = hash_conntrack(orig);
-		/* Try dropping from this hash chain. */
-		if (!early_drop(&nf_conntrack_hash[hash])) {
+		if (!early_drop(hash)) {
 			atomic_dec(&nf_conntrack_count);
 			if (net_ratelimit())
 				printk(KERN_WARNING
-- 
cgit v0.10.2


From 3569b621ceba0a9cfb80e24c0bd19fd632ccee25 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:38:07 -0700
Subject: [NETFILTER]: ipt_SAME: add to feature-removal-schedule

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 484250d..062fc2e 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -311,3 +311,11 @@ Who:  Tejun Heo <htejun@gmail.com>
 
 ---------------------------
 
+What:	iptables SAME target
+When:	1.1. 2008
+Files:	net/ipv4/netfilter/ipt_SAME.c, include/linux/netfilter_ipv4/ipt_SAME.h
+Why:	Obsolete for multiple years now, NAT core provides the same behaviour.
+	Unfixable broken wrt. 32/64 bit cleanness.
+Who:	Patrick McHardy <kaber@trash.net>
+
+---------------------------
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 46509fa..fa97947 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -230,7 +230,7 @@ config IP_NF_TARGET_NETMAP
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP_NF_TARGET_SAME
-	tristate "SAME target support"
+	tristate "SAME target support (OBSOLETE)"
 	depends on NF_NAT
 	help
 	  This option adds a `SAME' target, which works like the standard SNAT
-- 
cgit v0.10.2


From d3c3f4243e135b3d8c41d98be0cb2f54a4141abf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:38:30 -0700
Subject: [NETFILTER]: ipt_CLUSTERIP: add compat code

Adjust structure size and don't expect pointers passed in from
userspace to be valid. Also replace an enum in an ABI structure
by a fixed size type.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
index d9bceed..daf50be 100644
--- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
+++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
@@ -18,13 +18,13 @@ struct clusterip_config;
 struct ipt_clusterip_tgt_info {
 
 	u_int32_t flags;
-	
+
 	/* only relevant for new ones */
 	u_int8_t clustermac[6];
 	u_int16_t num_total_nodes;
 	u_int16_t num_local_nodes;
 	u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
-	enum clusterip_hashmode hash_mode;
+	u_int32_t hash_mode;
 	u_int32_t hash_initval;
 
 	struct clusterip_config *config;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1cef3b0..1981ace 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -397,23 +397,7 @@ checkentry(const char *tablename,
 	/* FIXME: further sanity checks */
 
 	config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
-	if (config) {
-		if (cipinfo->config != NULL) {
-			/* Case A: This is an entry that gets reloaded, since
-			 * it still has a cipinfo->config pointer. Simply
-			 * increase the entry refcount and return */
-			if (cipinfo->config != config) {
-				printk(KERN_ERR "CLUSTERIP: Reloaded entry "
-				       "has invalid config pointer!\n");
-				return false;
-			}
-		} else {
-			/* Case B: This is a new rule referring to an existing
-			 * clusterip config. */
-			cipinfo->config = config;
-		}
-	} else {
-		/* Case C: This is a completely new clusterip config */
+	if (!config) {
 		if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
 			printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
 			return false;
@@ -440,8 +424,8 @@ checkentry(const char *tablename,
 			}
 			dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
 		}
-		cipinfo->config = config;
 	}
+	cipinfo->config = config;
 
 	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
@@ -466,13 +450,30 @@ static void destroy(const struct xt_target *target, void *targinfo)
 	nf_ct_l3proto_module_put(target->family);
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_ipt_clusterip_tgt_info
+{
+	u_int32_t	flags;
+	u_int8_t	clustermac[6];
+	u_int16_t	num_total_nodes;
+	u_int16_t	num_local_nodes;
+	u_int16_t	local_nodes[CLUSTERIP_MAX_NODES];
+	u_int32_t	hash_mode;
+	u_int32_t	hash_initval;
+	compat_uptr_t	config;
+};
+#endif /* CONFIG_COMPAT */
+
 static struct xt_target clusterip_tgt __read_mostly = {
 	.name		= "CLUSTERIP",
 	.family		= AF_INET,
 	.target		= target,
-	.targetsize	= sizeof(struct ipt_clusterip_tgt_info),
 	.checkentry	= checkentry,
 	.destroy	= destroy,
+	.targetsize	= sizeof(struct ipt_clusterip_tgt_info),
+#ifdef CONFIG_COMPAT
+	.compatsize	= sizeof(struct compat_ipt_clusterip_tgt_info),
+#endif /* CONFIG_COMPAT */
 	.me		= THIS_MODULE
 };
 
-- 
cgit v0.10.2


From 91e8db80065d655ce1b6d74cadc921671e8d5285 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:38:54 -0700
Subject: [NETFILTER]: nf_conntrack_h323: turn some printks into DEBUGPs

Don't spam the ringbuffer with decoding errors. The only printks remaining
are for dropped packets when we're certain they are H.323.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 8c57b81..aa5ba99 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -163,9 +163,7 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff,
 				}
 
 				/* Fragmented TPKT */
-				if (net_ratelimit())
-					printk("nf_ct_h323: "
-					       "fragmented TPKT\n");
+				DEBUGP("nf_ct_h323: fragmented TPKT\n");
 				goto clear_out;
 			}
 
@@ -596,10 +594,9 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff,
 		ret = DecodeMultimediaSystemControlMessage(data, datalen,
 							   &mscm);
 		if (ret < 0) {
-			if (net_ratelimit())
-				printk("nf_ct_h245: decoding error: %s\n",
-				       ret == H323_ERROR_BOUND ?
-				       "out of bound" : "out of range");
+			DEBUGP("nf_ct_h245: decoding error: %s\n",
+			       ret == H323_ERROR_BOUND ?
+			       "out of bound" : "out of range");
 			/* We don't drop when decoding error */
 			break;
 		}
@@ -1137,10 +1134,9 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff,
 		/* Decode Q.931 signal */
 		ret = DecodeQ931(data, datalen, &q931);
 		if (ret < 0) {
-			if (net_ratelimit())
-				printk("nf_ct_q931: decoding error: %s\n",
-				       ret == H323_ERROR_BOUND ?
-				       "out of bound" : "out of range");
+			DEBUGP("nf_ct_q931: decoding error: %s\n",
+			       ret == H323_ERROR_BOUND ?
+			       "out of bound" : "out of range");
 			/* We don't drop when decoding error */
 			break;
 		}
@@ -1711,10 +1707,9 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff,
 	/* Decode RAS message */
 	ret = DecodeRasMessage(data, datalen, &ras);
 	if (ret < 0) {
-		if (net_ratelimit())
-			printk("nf_ct_ras: decoding error: %s\n",
-			       ret == H323_ERROR_BOUND ?
-			       "out of bound" : "out of range");
+		DEBUGP("nf_ct_ras: decoding error: %s\n",
+		       ret == H323_ERROR_BOUND ?
+		       "out of bound" : "out of range");
 		goto accept;
 	}
 
-- 
cgit v0.10.2


From 342b7e3c8a3c84252799c4ac4d9a604b8903d2b4 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:39:16 -0700
Subject: [NETFILTER]: xt_helper: use RCU

The ->helper pointer is protected by RCU, no need to take
nf_conntrack_lock. Also remove excessive debugging.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index d03acb0..0a1f4c6 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -22,11 +22,6 @@ MODULE_DESCRIPTION("iptables helper match module");
 MODULE_ALIAS("ipt_helper");
 MODULE_ALIAS("ip6t_helper");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
 
 static bool
 match(const struct sk_buff *skb,
@@ -41,38 +36,28 @@ match(const struct sk_buff *skb,
 	const struct xt_helper_info *info = matchinfo;
 	const struct nf_conn *ct;
 	const struct nf_conn_help *master_help;
+	const struct nf_conntrack_helper *helper;
 	enum ip_conntrack_info ctinfo;
 	bool ret = info->invert;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	if (!ct) {
-		DEBUGP("xt_helper: Eek! invalid conntrack?\n");
+	if (!ct || !ct->master)
 		return ret;
-	}
-
-	if (!ct->master) {
-		DEBUGP("xt_helper: conntrack %p has no master\n", ct);
-		return ret;
-	}
 
-	read_lock_bh(&nf_conntrack_lock);
 	master_help = nfct_help(ct->master);
-	if (!master_help || !master_help->helper) {
-		DEBUGP("xt_helper: master ct %p has no helper\n",
-			exp->expectant);
-		goto out_unlock;
-	}
+	if (!master_help)
+		return ret;
 
-	DEBUGP("master's name = %s , info->name = %s\n",
-		ct->master->helper->name, info->name);
+	/* rcu_read_lock()ed by nf_hook_slow */
+	helper = rcu_dereference(master_help->helper);
+	if (!helper)
+		return ret;
 
 	if (info->name[0] == '\0')
 		ret = !ret;
 	else
 		ret ^= !strncmp(master_help->helper->name, info->name,
 				strlen(master_help->helper->name));
-out_unlock:
-	read_unlock_bh(&nf_conntrack_lock);
 	return ret;
 }
 
-- 
cgit v0.10.2


From 0d53778e81ac7af266dac8a20cc328328c327112 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:39:38 -0700
Subject: [NETFILTER]: Convert DEBUGP to pr_debug

Convert DEBUGP to pr_debug and fix lots of non-compiling debug statements.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h
index 9d8144a..c93061f 100644
--- a/include/linux/netfilter/nf_conntrack_pptp.h
+++ b/include/linux/netfilter/nf_conntrack_pptp.h
@@ -4,6 +4,8 @@
 
 #include <linux/netfilter/nf_conntrack_common.h>
 
+extern const char *pptp_msg_name[];
+
 /* state of the control session */
 enum pptp_ctrlsess_state {
 	PPTP_SESSION_NONE,			/* no session present */
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index 99934ab5..040dae5 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -120,11 +120,11 @@ struct nf_conntrack_tuple_mask
 
 #ifdef __KERNEL__
 
-#define NF_CT_DUMP_TUPLE(tp)						    \
-DEBUGP("tuple %p: %u %u " NIP6_FMT " %hu -> " NIP6_FMT " %hu\n",	    \
-	(tp), (tp)->src.l3num, (tp)->dst.protonum,			    \
-	NIP6(*(struct in6_addr *)(tp)->src.u3.all), ntohs((tp)->src.u.all), \
-	NIP6(*(struct in6_addr *)(tp)->dst.u3.all), ntohs((tp)->dst.u.all))
+#define NF_CT_DUMP_TUPLE(tp)						     \
+pr_debug("tuple %p: %u %u " NIP6_FMT " %hu -> " NIP6_FMT " %hu\n",	     \
+	 (tp), (tp)->src.l3num, (tp)->dst.protonum,			     \
+	 NIP6(*(struct in6_addr *)(tp)->src.u3.all), ntohs((tp)->src.u.all), \
+	 NIP6(*(struct in6_addr *)(tp)->dst.u3.all), ntohs((tp)->dst.u.all))
 
 /* If we're the first tuple, it's the original dir. */
 #define NF_CT_DIRECTION(h)						\
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1981ace..8bacda3 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -30,14 +30,6 @@
 
 #define CLUSTERIP_VERSION "0.8"
 
-#define DEBUG_CLUSTERIP
-
-#ifdef DEBUG_CLUSTERIP
-#define DEBUGP	printk
-#else
-#define DEBUGP
-#endif
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("iptables target for CLUSTERIP");
@@ -351,15 +343,15 @@ target(struct sk_buff **pskb,
 			break;
 	}
 
-#ifdef DEBUG_CLUSTERP
+#ifdef DEBUG
 	DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 #endif
-	DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark);
+	pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
 	if (!clusterip_responsible(cipinfo->config, hash)) {
-		DEBUGP("not responsible\n");
+		pr_debug("not responsible\n");
 		return NF_DROP;
 	}
-	DEBUGP("responsible\n");
+	pr_debug("responsible\n");
 
 	/* despite being received via linklayer multicast, this is
 	 * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
@@ -490,7 +482,7 @@ struct arp_payload {
 	__be32 dst_ip;
 } __attribute__ ((packed));
 
-#ifdef CLUSTERIP_DEBUG
+#ifdef DEBUG
 static void arp_print(struct arp_payload *payload)
 {
 #define HBUFFERLEN 30
@@ -546,8 +538,9 @@ arp_mangle(unsigned int hook,
 	 * this wouldn't work, since we didn't subscribe the mcast group on
 	 * other interfaces */
 	if (c->dev != out) {
-		DEBUGP("CLUSTERIP: not mangling arp reply on different "
-		       "interface: cip'%s'-skb'%s'\n", c->dev->name, out->name);
+		pr_debug("CLUSTERIP: not mangling arp reply on different "
+			 "interface: cip'%s'-skb'%s'\n",
+			 c->dev->name, out->name);
 		clusterip_config_put(c);
 		return NF_ACCEPT;
 	}
@@ -555,8 +548,8 @@ arp_mangle(unsigned int hook,
 	/* mangle reply hardware address */
 	memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
 
-#ifdef CLUSTERIP_DEBUG
-	DEBUGP(KERN_DEBUG "CLUSTERIP mangled arp reply: ");
+#ifdef DEBUG
+	pr_debug(KERN_DEBUG "CLUSTERIP mangled arp reply: ");
 	arp_print(payload);
 #endif
 
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 9bfce61..5937ad1 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -27,12 +27,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables syslog logging module");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* Use lock to serialize, so printks don't overlap */
 static DEFINE_SPINLOCK(log_lock);
 
@@ -452,12 +446,12 @@ static bool ipt_log_checkentry(const char *tablename,
 	const struct ipt_log_info *loginfo = targinfo;
 
 	if (loginfo->level >= 8) {
-		DEBUGP("LOG: level %u >= 8\n", loginfo->level);
+		pr_debug("LOG: level %u >= 8\n", loginfo->level);
 		return false;
 	}
 	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
-		DEBUGP("LOG: prefix term %i\n",
-		       loginfo->prefix[sizeof(loginfo->prefix)-1]);
+		pr_debug("LOG: prefix term %i\n",
+			 loginfo->prefix[sizeof(loginfo->prefix)-1]);
 		return false;
 	}
 	return true;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index bc033e0..7c4e4be 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -27,12 +27,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables MASQUERADE target module");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* Lock protects masq region inside conntrack */
 static DEFINE_RWLOCK(masq_lock);
 
@@ -47,11 +41,11 @@ masquerade_check(const char *tablename,
 	const struct nf_nat_multi_range_compat *mr = targinfo;
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
-		DEBUGP("masquerade_check: bad MAP_IPS.\n");
+		pr_debug("masquerade_check: bad MAP_IPS.\n");
 		return false;
 	}
 	if (mr->rangesize != 1) {
-		DEBUGP("masquerade_check: bad rangesize %u.\n", mr->rangesize);
+		pr_debug("masquerade_check: bad rangesize %u\n", mr->rangesize);
 		return false;
 	}
 	return true;
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 0a7ce15..41a011d 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -18,17 +18,10 @@
 #include <linux/netfilter/x_tables.h>
 #include <net/netfilter/nf_nat_rule.h>
 
-#define MODULENAME "NETMAP"
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
 MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 static bool
 check(const char *tablename,
       const void *e,
@@ -39,11 +32,11 @@ check(const char *tablename,
 	const struct nf_nat_multi_range_compat *mr = targinfo;
 
 	if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
-		DEBUGP(MODULENAME":check: bad MAP_IPS.\n");
+		pr_debug("NETMAP:check: bad MAP_IPS.\n");
 		return false;
 	}
 	if (mr->rangesize != 1) {
-		DEBUGP(MODULENAME":check: bad rangesize %u.\n", mr->rangesize);
+		pr_debug("NETMAP:check: bad rangesize %u.\n", mr->rangesize);
 		return false;
 	}
 	return true;
@@ -86,7 +79,7 @@ target(struct sk_buff **pskb,
 }
 
 static struct xt_target target_module __read_mostly = {
-	.name 		= MODULENAME,
+	.name 		= "NETMAP",
 	.family		= AF_INET,
 	.target 	= target,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 61e1e47..6ac7a23 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -25,12 +25,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables REDIRECT target module");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* FIXME: Take multiple ranges --RR */
 static bool
 redirect_check(const char *tablename,
@@ -42,11 +36,11 @@ redirect_check(const char *tablename,
 	const struct nf_nat_multi_range_compat *mr = targinfo;
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
-		DEBUGP("redirect_check: bad MAP_IPS.\n");
+		pr_debug("redirect_check: bad MAP_IPS.\n");
 		return false;
 	}
 	if (mr->rangesize != 1) {
-		DEBUGP("redirect_check: bad rangesize %u.\n", mr->rangesize);
+		pr_debug("redirect_check: bad rangesize %u.\n", mr->rangesize);
 		return false;
 	}
 	return true;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index dd5432c..cb038c8 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -31,12 +31,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables REJECT target module");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* Send RST reply */
 static void send_reset(struct sk_buff *oldskb, int hook)
 {
@@ -227,13 +221,13 @@ static bool check(const char *tablename,
 	const struct ipt_entry *e = e_void;
 
 	if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
-		printk("REJECT: ECHOREPLY no longer supported.\n");
+		printk("ipt_REJECT: ECHOREPLY no longer supported.\n");
 		return false;
 	} else if (rejinfo->with == IPT_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (e->ip.proto != IPPROTO_TCP
 		    || (e->ip.invflags & XT_INV_PROTO)) {
-			DEBUGP("REJECT: TCP_RESET invalid for non-tcp\n");
+			printk("ipt_REJECT: TCP_RESET invalid for non-tcp\n");
 			return false;
 		}
 	}
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 3a0d7da..97641f1 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -27,12 +27,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Martin Josefsson <gandalf@wlug.westbo.se>");
 MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 static bool
 same_check(const char *tablename,
 	      const void *e,
@@ -46,54 +40,52 @@ same_check(const char *tablename,
 	mr->ipnum = 0;
 
 	if (mr->rangesize < 1) {
-		DEBUGP("same_check: need at least one dest range.\n");
+		pr_debug("same_check: need at least one dest range.\n");
 		return false;
 	}
 	if (mr->rangesize > IPT_SAME_MAX_RANGE) {
-		DEBUGP("same_check: too many ranges specified, maximum "
-				"is %u ranges\n",
-				IPT_SAME_MAX_RANGE);
+		pr_debug("same_check: too many ranges specified, maximum "
+			 "is %u ranges\n", IPT_SAME_MAX_RANGE);
 		return false;
 	}
 	for (count = 0; count < mr->rangesize; count++) {
 		if (ntohl(mr->range[count].min_ip) >
 				ntohl(mr->range[count].max_ip)) {
-			DEBUGP("same_check: min_ip is larger than max_ip in "
-				"range `%u.%u.%u.%u-%u.%u.%u.%u'.\n",
-				NIPQUAD(mr->range[count].min_ip),
-				NIPQUAD(mr->range[count].max_ip));
+			pr_debug("same_check: min_ip is larger than max_ip in "
+				 "range `%u.%u.%u.%u-%u.%u.%u.%u'.\n",
+				 NIPQUAD(mr->range[count].min_ip),
+				 NIPQUAD(mr->range[count].max_ip));
 			return false;
 		}
 		if (!(mr->range[count].flags & IP_NAT_RANGE_MAP_IPS)) {
-			DEBUGP("same_check: bad MAP_IPS.\n");
+			pr_debug("same_check: bad MAP_IPS.\n");
 			return false;
 		}
 		rangeip = (ntohl(mr->range[count].max_ip) -
 					ntohl(mr->range[count].min_ip) + 1);
 		mr->ipnum += rangeip;
 
-		DEBUGP("same_check: range %u, ipnum = %u\n", count, rangeip);
+		pr_debug("same_check: range %u, ipnum = %u\n", count, rangeip);
 	}
-	DEBUGP("same_check: total ipaddresses = %u\n", mr->ipnum);
+	pr_debug("same_check: total ipaddresses = %u\n", mr->ipnum);
 
 	mr->iparray = kmalloc((sizeof(u_int32_t) * mr->ipnum), GFP_KERNEL);
 	if (!mr->iparray) {
-		DEBUGP("same_check: Couldn't allocate %u bytes "
-			"for %u ipaddresses!\n",
-			(sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
+		pr_debug("same_check: Couldn't allocate %Zu bytes "
+			 "for %u ipaddresses!\n",
+			 (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
 		return false;
 	}
-	DEBUGP("same_check: Allocated %u bytes for %u ipaddresses.\n",
-			(sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
+	pr_debug("same_check: Allocated %Zu bytes for %u ipaddresses.\n",
+		 (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
 
 	for (count = 0; count < mr->rangesize; count++) {
 		for (countess = ntohl(mr->range[count].min_ip);
 				countess <= ntohl(mr->range[count].max_ip);
 					countess++) {
 			mr->iparray[index] = countess;
-			DEBUGP("same_check: Added ipaddress `%u.%u.%u.%u' "
-				"in index %u.\n",
-				HIPQUAD(countess), index);
+			pr_debug("same_check: Added ipaddress `%u.%u.%u.%u' "
+				 "in index %u.\n", HIPQUAD(countess), index);
 			index++;
 		}
 	}
@@ -107,8 +99,8 @@ same_destroy(const struct xt_target *target, void *targinfo)
 
 	kfree(mr->iparray);
 
-	DEBUGP("same_destroy: Deallocated %u bytes for %u ipaddresses.\n",
-			(sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
+	pr_debug("same_destroy: Deallocated %Zu bytes for %u ipaddresses.\n",
+		 (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
 }
 
 static unsigned int
@@ -146,10 +138,9 @@ same_target(struct sk_buff **pskb,
 
 	new_ip = htonl(same->iparray[aindex]);
 
-	DEBUGP("ipt_SAME: src=%u.%u.%u.%u dst=%u.%u.%u.%u, "
-			"new src=%u.%u.%u.%u\n",
-			NIPQUAD(t->src.ip), NIPQUAD(t->dst.ip),
-			NIPQUAD(new_ip));
+	pr_debug("ipt_SAME: src=%u.%u.%u.%u dst=%u.%u.%u.%u, "
+		 "new src=%u.%u.%u.%u\n",
+		 NIPQUAD(t->src.u3.ip), NIPQUAD(t->dst.u3.ip), NIPQUAD(new_ip));
 
 	/* Transfer from original range. */
 	newrange = ((struct nf_nat_range)
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 226750d..6ca43e4 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -55,13 +55,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
 #define ULOG_NL_EVENT		111		/* Harald's favorite number */
 #define ULOG_MAXNLGROUPS	32		/* numer of nlgroups */
 
-#if 0
-#define DEBUGP(format, args...) printk("%s:%s:" format, \
-				       __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
 #define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
 
 static unsigned int nlbufsiz = NLMSG_GOODSIZE;
@@ -96,12 +89,12 @@ static void ulog_send(unsigned int nlgroupnum)
 	ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
 
 	if (timer_pending(&ub->timer)) {
-		DEBUGP("ipt_ULOG: ulog_send: timer was pending, deleting\n");
+		pr_debug("ipt_ULOG: ulog_send: timer was pending, deleting\n");
 		del_timer(&ub->timer);
 	}
 
 	if (!ub->skb) {
-		DEBUGP("ipt_ULOG: ulog_send: nothing to send\n");
+		pr_debug("ipt_ULOG: ulog_send: nothing to send\n");
 		return;
 	}
 
@@ -110,8 +103,8 @@ static void ulog_send(unsigned int nlgroupnum)
 		ub->lastnlh->nlmsg_type = NLMSG_DONE;
 
 	NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
-	DEBUGP("ipt_ULOG: throwing %d packets to netlink group %u\n",
-		ub->qlen, nlgroupnum + 1);
+	pr_debug("ipt_ULOG: throwing %d packets to netlink group %u\n",
+		 ub->qlen, nlgroupnum + 1);
 	netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC);
 
 	ub->qlen = 0;
@@ -123,7 +116,7 @@ static void ulog_send(unsigned int nlgroupnum)
 /* timer function to flush queue in flushtimeout time */
 static void ulog_timer(unsigned long data)
 {
-	DEBUGP("ipt_ULOG: timer function called, calling ulog_send\n");
+	pr_debug("ipt_ULOG: timer function called, calling ulog_send\n");
 
 	/* lock to protect against somebody modifying our structure
 	 * from ipt_ulog_target at the same time */
@@ -204,8 +197,8 @@ static void ipt_ulog_packet(unsigned int hooknum,
 			goto alloc_failure;
 	}
 
-	DEBUGP("ipt_ULOG: qlen %d, qthreshold %d\n", ub->qlen,
-		loginfo->qthreshold);
+	pr_debug("ipt_ULOG: qlen %d, qthreshold %Zu\n", ub->qlen,
+		 loginfo->qthreshold);
 
 	/* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */
 	nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT,
@@ -334,13 +327,13 @@ static bool ipt_ulog_checkentry(const char *tablename,
 	const struct ipt_ulog_info *loginfo = targinfo;
 
 	if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
-		DEBUGP("ipt_ULOG: prefix term %i\n",
-		       loginfo->prefix[sizeof(loginfo->prefix) - 1]);
+		pr_debug("ipt_ULOG: prefix term %i\n",
+			 loginfo->prefix[sizeof(loginfo->prefix) - 1]);
 		return false;
 	}
 	if (loginfo->qthreshold > ULOG_MAX_QLEN) {
-		DEBUGP("ipt_ULOG: queue threshold %i > MAX_QLEN\n",
-			loginfo->qthreshold);
+		pr_debug("ipt_ULOG: queue threshold %Zu > MAX_QLEN\n",
+			 loginfo->qthreshold);
 		return false;
 	}
 	return true;
@@ -405,7 +398,7 @@ static int __init ipt_ulog_init(void)
 {
 	int ret, i;
 
-	DEBUGP("ipt_ULOG: init module\n");
+	pr_debug("ipt_ULOG: init module\n");
 
 	if (nlbufsiz > 128*1024) {
 		printk("Netlink buffer has to be <= 128kB\n");
@@ -437,7 +430,7 @@ static void __exit ipt_ulog_fini(void)
 	ulog_buff_t *ub;
 	int i;
 
-	DEBUGP("ipt_ULOG: cleanup_module\n");
+	pr_debug("ipt_ULOG: cleanup_module\n");
 
 	if (nflog)
 		nf_log_unregister(&ipt_ulog_logger);
@@ -448,7 +441,7 @@ static void __exit ipt_ulog_fini(void)
 	for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
 		ub = &ulog_buffers[i];
 		if (timer_pending(&ub->timer)) {
-			DEBUGP("timer was pending, deleting\n");
+			pr_debug("timer was pending, deleting\n");
 			del_timer(&ub->timer);
 		}
 
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index 6a3a033..0106dc9 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -17,12 +17,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
 MODULE_DESCRIPTION("iptables arbitrary IP range match module");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 static bool
 match(const struct sk_buff *skb,
       const struct net_device *in,
@@ -38,12 +32,12 @@ match(const struct sk_buff *skb,
 		if ((ntohl(iph->saddr) < ntohl(info->src.min_ip)
 			  || ntohl(iph->saddr) > ntohl(info->src.max_ip))
 			 ^ !!(info->flags & IPRANGE_SRC_INV)) {
-			DEBUGP("src IP %u.%u.%u.%u NOT in range %s"
-			       "%u.%u.%u.%u-%u.%u.%u.%u\n",
-				NIPQUAD(iph->saddr),
-				info->flags & IPRANGE_SRC_INV ? "(INV) " : "",
-				NIPQUAD(info->src.min_ip),
-				NIPQUAD(info->src.max_ip));
+			pr_debug("src IP %u.%u.%u.%u NOT in range %s"
+				 "%u.%u.%u.%u-%u.%u.%u.%u\n",
+				 NIPQUAD(iph->saddr),
+				 info->flags & IPRANGE_SRC_INV ? "(INV) " : "",
+				 NIPQUAD(info->src.min_ip),
+				 NIPQUAD(info->src.max_ip));
 			return false;
 		}
 	}
@@ -51,12 +45,12 @@ match(const struct sk_buff *skb,
 		if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip)
 			  || ntohl(iph->daddr) > ntohl(info->dst.max_ip))
 			 ^ !!(info->flags & IPRANGE_DST_INV)) {
-			DEBUGP("dst IP %u.%u.%u.%u NOT in range %s"
-			       "%u.%u.%u.%u-%u.%u.%u.%u\n",
-				NIPQUAD(iph->daddr),
-				info->flags & IPRANGE_DST_INV ? "(INV) " : "",
-				NIPQUAD(info->dst.min_ip),
-				NIPQUAD(info->dst.max_ip));
+			pr_debug("dst IP %u.%u.%u.%u NOT in range %s"
+				 "%u.%u.%u.%u-%u.%u.%u.%u\n",
+				 NIPQUAD(iph->daddr),
+				 info->flags & IPRANGE_DST_INV ? "(INV) " : "",
+				 NIPQUAD(info->dst.min_ip),
+				 NIPQUAD(info->dst.max_ip));
 			return false;
 		}
 	}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index a103f59..3c56299 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -24,12 +24,6 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
 			     struct nf_conntrack_tuple *tuple)
 {
@@ -324,13 +318,13 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 
 	/* We only do TCP at the moment: is there a better way? */
 	if (strcmp(sk->sk_prot->name, "TCP")) {
-		DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
+		pr_debug("SO_ORIGINAL_DST: Not a TCP socket\n");
 		return -ENOPROTOOPT;
 	}
 
 	if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
-		DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
-		       *len, sizeof(struct sockaddr_in));
+		pr_debug("SO_ORIGINAL_DST: len %d not %Zu\n",
+			 *len, sizeof(struct sockaddr_in));
 		return -EINVAL;
 	}
 
@@ -346,17 +340,17 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 			.tuple.dst.u3.ip;
 		memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
 
-		DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
-		       NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+		pr_debug("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
+			 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
 		nf_ct_put(ct);
 		if (copy_to_user(user, &sin, sizeof(sin)) != 0)
 			return -EFAULT;
 		else
 			return 0;
 	}
-	DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
-	       NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port),
-	       NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port));
+	pr_debug("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
+		 NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port),
+		 NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port));
 	return -ENOENT;
 }
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index ab8e4c6..434e084 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -18,12 +18,6 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 #ifdef CONFIG_NF_CT_ACCT
 static unsigned int
 seq_print_counters(struct seq_file *s,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 91fb277..0fe8fb0 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -21,12 +21,6 @@
 
 static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ;
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 static int icmp_pkt_to_tuple(const struct sk_buff *skb,
 			     unsigned int dataoff,
 			     struct nf_conntrack_tuple *tuple)
@@ -125,8 +119,8 @@ static int icmp_new(struct nf_conn *conntrack,
 	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
 	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
 		/* Can't create a new ICMP `conn' with this. */
-		DEBUGP("icmp: can't create new conn with type %u\n",
-		       conntrack->tuplehash[0].tuple.dst.u.icmp.type);
+		pr_debug("icmp: can't create new conn with type %u\n",
+			 conntrack->tuplehash[0].tuple.dst.u.icmp.type);
 		NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
 		return 0;
 	}
@@ -159,8 +153,8 @@ icmp_error_message(struct sk_buff *skb,
 
 	/* Ignore ICMP's containing fragments (shouldn't happen) */
 	if (inside->ip.frag_off & htons(IP_OFFSET)) {
-		DEBUGP("icmp_error_message: fragment of proto %u\n",
-		       inside->ip.protocol);
+		pr_debug("icmp_error_message: fragment of proto %u\n",
+			 inside->ip.protocol);
 		return -NF_ACCEPT;
 	}
 
@@ -172,8 +166,8 @@ icmp_error_message(struct sk_buff *skb,
 	if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
 			     inside->ip.protocol, &origtuple,
 			     &nf_conntrack_l3proto_ipv4, innerproto)) {
-		DEBUGP("icmp_error_message: ! get_tuple p=%u",
-		       inside->ip.protocol);
+		pr_debug("icmp_error_message: ! get_tuple p=%u",
+			 inside->ip.protocol);
 		return -NF_ACCEPT;
 	}
 
@@ -181,7 +175,7 @@ icmp_error_message(struct sk_buff *skb,
 	   been preserved inside the ICMP. */
 	if (!nf_ct_invert_tuple(&innertuple, &origtuple,
 				&nf_conntrack_l3proto_ipv4, innerproto)) {
-		DEBUGP("icmp_error_message: no match\n");
+		pr_debug("icmp_error_message: no match\n");
 		return -NF_ACCEPT;
 	}
 
@@ -196,7 +190,7 @@ icmp_error_message(struct sk_buff *skb,
 			h = nf_conntrack_find_get(&origtuple);
 
 		if (!h) {
-			DEBUGP("icmp_error_message: no match\n");
+			pr_debug("icmp_error_message: no match\n");
 			return -NF_ACCEPT;
 		}
 
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index f242ac6..e848d8d 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -31,12 +31,6 @@
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 static DEFINE_RWLOCK(nf_nat_lock);
 
 static struct nf_conntrack_l3proto *l3proto = NULL;
@@ -242,7 +236,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	   manips not an issue.  */
 	if (maniptype == IP_NAT_MANIP_SRC) {
 		if (find_appropriate_src(orig_tuple, tuple, range)) {
-			DEBUGP("get_unique_tuple: Found current src map\n");
+			pr_debug("get_unique_tuple: Found current src map\n");
 			if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
 				if (!nf_nat_used_tuple(tuple, ct))
 					return;
@@ -293,7 +287,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 	if (!nat) {
 		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
 		if (nat == NULL) {
-			DEBUGP("failed to add NAT extension\n");
+			pr_debug("failed to add NAT extension\n");
 			return NF_ACCEPT;
 		}
 	}
@@ -462,8 +456,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 			return 0;
 	}
 
-	DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
-	       *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
+	pr_debug("icmp_reply_translation: translating error %p manip %u "
+		 "dir %s\n", *pskb, manip,
+		 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
 
 	/* rcu_read_lock()ed by nf_hook_slow */
 	l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index cae4b46..3663bd8 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -25,12 +25,6 @@ MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
 MODULE_DESCRIPTION("ftp NAT helper");
 MODULE_ALIAS("ip_nat_ftp");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* FIXME: Time out? --RR */
 
 static int
@@ -47,7 +41,7 @@ mangle_rfc959_packet(struct sk_buff **pskb,
 	sprintf(buffer, "%u,%u,%u,%u,%u,%u",
 		NIPQUAD(newip), port>>8, port&0xFF);
 
-	DEBUGP("calling nf_nat_mangle_tcp_packet\n");
+	pr_debug("calling nf_nat_mangle_tcp_packet\n");
 
 	return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
 					matchlen, buffer, strlen(buffer));
@@ -67,7 +61,7 @@ mangle_eprt_packet(struct sk_buff **pskb,
 
 	sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
 
-	DEBUGP("calling nf_nat_mangle_tcp_packet\n");
+	pr_debug("calling nf_nat_mangle_tcp_packet\n");
 
 	return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
 					matchlen, buffer, strlen(buffer));
@@ -87,7 +81,7 @@ mangle_epsv_packet(struct sk_buff **pskb,
 
 	sprintf(buffer, "|||%u|", port);
 
-	DEBUGP("calling nf_nat_mangle_tcp_packet\n");
+	pr_debug("calling nf_nat_mangle_tcp_packet\n");
 
 	return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
 					matchlen, buffer, strlen(buffer));
@@ -117,7 +111,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb,
 	int dir = CTINFO2DIR(ctinfo);
 	struct nf_conn *ct = exp->master;
 
-	DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
+	pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
 
 	/* Connection will come from wherever this packet goes, hence !dir */
 	newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 3d760dd..c1b059a 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -21,12 +21,6 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <linux/netfilter/nf_conntrack_h323.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /****************************************************************************/
 static int set_addr(struct sk_buff **pskb,
 		    unsigned char **data, int dataoff,
@@ -126,12 +120,11 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct,
 				    (ntohl(addr.ip) & 0xff000000) == 0x7f000000)
 					i = 0;
 
-				DEBUGP
-				    ("nf_nat_ras: set signal address "
-				     "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-				     NIPQUAD(ip), port,
-				     NIPQUAD(ct->tuplehash[!dir].tuple.dst.
-					     ip), info->sig_port[!dir]);
+				pr_debug("nf_nat_ras: set signal address "
+					 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+					 NIPQUAD(addr.ip), port,
+					 NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip),
+					 info->sig_port[!dir]);
 				return set_h225_addr(pskb, data, 0, &taddr[i],
 						     &ct->tuplehash[!dir].
 						     tuple.dst.u3,
@@ -139,12 +132,11 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct,
 			} else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
 				   port == info->sig_port[dir]) {
 				/* GK->GW */
-				DEBUGP
-				    ("nf_nat_ras: set signal address "
-				     "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-				     NIPQUAD(ip), port,
-				     NIPQUAD(ct->tuplehash[!dir].tuple.src.
-					     ip), info->sig_port[!dir]);
+				pr_debug("nf_nat_ras: set signal address "
+					 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+					 NIPQUAD(addr.ip), port,
+					 NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip),
+					 info->sig_port[!dir]);
 				return set_h225_addr(pskb, data, 0, &taddr[i],
 						     &ct->tuplehash[!dir].
 						     tuple.src.u3,
@@ -171,12 +163,11 @@ static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct,
 		if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) &&
 		    addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
 		    port == ct->tuplehash[dir].tuple.src.u.udp.port) {
-			DEBUGP("nf_nat_ras: set rasAddress "
-			       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-			       NIPQUAD(ip), ntohs(port),
-			       NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip),
-			       ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.
-				     port));
+			pr_debug("nf_nat_ras: set rasAddress "
+				 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+				 NIPQUAD(addr.ip), ntohs(port),
+				 NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip),
+				 ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port));
 			return set_h225_addr(pskb, data, 0, &taddr[i],
 					     &ct->tuplehash[!dir].tuple.dst.u3,
 					     ct->tuplehash[!dir].tuple.
@@ -267,16 +258,16 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
 	}
 
 	/* Success */
-	DEBUGP("nf_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(rtp_exp->tuple.src.ip),
-	       ntohs(rtp_exp->tuple.src.u.udp.port),
-	       NIPQUAD(rtp_exp->tuple.dst.ip),
-	       ntohs(rtp_exp->tuple.dst.u.udp.port));
-	DEBUGP("nf_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(rtcp_exp->tuple.src.ip),
-	       ntohs(rtcp_exp->tuple.src.u.udp.port),
-	       NIPQUAD(rtcp_exp->tuple.dst.ip),
-	       ntohs(rtcp_exp->tuple.dst.u.udp.port));
+	pr_debug("nf_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+		 NIPQUAD(rtp_exp->tuple.src.u3.ip),
+		 ntohs(rtp_exp->tuple.src.u.udp.port),
+		 NIPQUAD(rtp_exp->tuple.dst.u3.ip),
+		 ntohs(rtp_exp->tuple.dst.u.udp.port));
+	pr_debug("nf_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+		 NIPQUAD(rtcp_exp->tuple.src.u3.ip),
+		 ntohs(rtcp_exp->tuple.src.u.udp.port),
+		 NIPQUAD(rtcp_exp->tuple.dst.u3.ip),
+		 ntohs(rtcp_exp->tuple.dst.u.udp.port));
 
 	return 0;
 }
@@ -317,9 +308,11 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct,
 		return -1;
 	}
 
-	DEBUGP("nf_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
-	       NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
+	pr_debug("nf_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+		 NIPQUAD(exp->tuple.src.u3.ip),
+		 ntohs(exp->tuple.src.u.tcp.port),
+		 NIPQUAD(exp->tuple.dst.u3.ip),
+		 ntohs(exp->tuple.dst.u.tcp.port));
 
 	return 0;
 }
@@ -369,9 +362,11 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
 		return -1;
 	}
 
-	DEBUGP("nf_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
-	       NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
+	pr_debug("nf_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+		 NIPQUAD(exp->tuple.src.u3.ip),
+		 ntohs(exp->tuple.src.u.tcp.port),
+		 NIPQUAD(exp->tuple.dst.u3.ip),
+		 ntohs(exp->tuple.dst.u.tcp.port));
 
 	return 0;
 }
@@ -465,9 +460,11 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct,
 	}
 
 	/* Success */
-	DEBUGP("nf_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
-	       NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
+	pr_debug("nf_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+		 NIPQUAD(exp->tuple.src.u3.ip),
+		 ntohs(exp->tuple.src.u.tcp.port),
+		 NIPQUAD(exp->tuple.dst.u3.ip),
+		 ntohs(exp->tuple.dst.u.tcp.port));
 
 	return 0;
 }
@@ -536,10 +533,12 @@ static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct,
 	}
 
 	/* Success */
-	DEBUGP("nf_nat_q931: expect Call Forwarding "
-	       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
-	       NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
+	pr_debug("nf_nat_q931: expect Call Forwarding "
+		 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+		 NIPQUAD(exp->tuple.src.u3.ip),
+		 ntohs(exp->tuple.src.u.tcp.port),
+		 NIPQUAD(exp->tuple.dst.u3.ip),
+		 ntohs(exp->tuple.dst.u.tcp.port));
 
 	return 0;
 }
@@ -566,8 +565,6 @@ static int __init init(void)
 	rcu_assign_pointer(nat_h245_hook, nat_h245);
 	rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding);
 	rcu_assign_pointer(nat_q931_hook, nat_q931);
-
-	DEBUGP("nf_nat_h323: init success\n");
 	return 0;
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index f3383fc..93d8a0a 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -26,13 +26,9 @@
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_helper.h>
 
-#if 0
-#define DEBUGP printk
-#define DUMP_OFFSET(x)	printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos);
-#else
-#define DEBUGP(format, args...)
-#define DUMP_OFFSET(x)
-#endif
+#define DUMP_OFFSET(x) \
+	pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
+		 x->offset_before, x->offset_after, x->correction_pos);
 
 static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
 
@@ -47,15 +43,15 @@ adjust_tcp_sequence(u32 seq,
 	struct nf_nat_seq *this_way, *other_way;
 	struct nf_conn_nat *nat = nfct_nat(ct);
 
-	DEBUGP("nf_nat_resize_packet: old_size = %u, new_size = %u\n",
-		(*skb)->len, new_size);
+	pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n",
+		 ntohl(seq), seq);
 
 	dir = CTINFO2DIR(ctinfo);
 
 	this_way = &nat->seq[dir];
 	other_way = &nat->seq[!dir];
 
-	DEBUGP("nf_nat_resize_packet: Seq_offset before: ");
+	pr_debug("nf_nat_resize_packet: Seq_offset before: ");
 	DUMP_OFFSET(this_way);
 
 	spin_lock_bh(&nf_nat_seqofs_lock);
@@ -72,7 +68,7 @@ adjust_tcp_sequence(u32 seq,
 	}
 	spin_unlock_bh(&nf_nat_seqofs_lock);
 
-	DEBUGP("nf_nat_resize_packet: Seq_offset after: ");
+	pr_debug("nf_nat_resize_packet: Seq_offset after: ");
 	DUMP_OFFSET(this_way);
 }
 
@@ -100,14 +96,12 @@ static void mangle_contents(struct sk_buff *skb,
 
 	/* update skb info */
 	if (rep_len > match_len) {
-		DEBUGP("nf_nat_mangle_packet: Extending packet by "
-		       "%u from %u bytes\n", rep_len - match_len,
-		       skb->len);
+		pr_debug("nf_nat_mangle_packet: Extending packet by "
+			 "%u from %u bytes\n", rep_len - match_len, skb->len);
 		skb_put(skb, rep_len - match_len);
 	} else {
-		DEBUGP("nf_nat_mangle_packet: Shrinking packet from "
-		       "%u from %u bytes\n", match_len - rep_len,
-		       skb->len);
+		pr_debug("nf_nat_mangle_packet: Shrinking packet from "
+			 "%u from %u bytes\n", match_len - rep_len, skb->len);
 		__skb_trim(skb, skb->len + rep_len - match_len);
 	}
 
@@ -320,9 +314,9 @@ sack_adjust(struct sk_buff *skb,
 			new_end_seq = htonl(ntohl(sack->end_seq)
 				      - natseq->offset_before);
 
-		DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
-			ntohl(sack->start_seq), new_start_seq,
-			ntohl(sack->end_seq), new_end_seq);
+		pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
+			 ntohl(sack->start_seq), new_start_seq,
+			 ntohl(sack->end_seq), new_end_seq);
 
 		nf_proto_csum_replace4(&tcph->check, skb,
 				       sack->start_seq, new_start_seq, 0);
@@ -414,9 +408,9 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
 	nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0);
 	nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0);
 
-	DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
-		ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
-		ntohl(newack));
+	pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+		 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
+		 ntohl(newack));
 
 	tcph->seq = newseq;
 	tcph->ack_seq = newack;
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index db7fbf6..bcf274b 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -22,12 +22,6 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <linux/netfilter/nf_conntrack_irc.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
 MODULE_DESCRIPTION("IRC (DCC) NAT helper");
 MODULE_LICENSE("GPL");
@@ -44,9 +38,6 @@ static unsigned int help(struct sk_buff **pskb,
 	u_int16_t port;
 	unsigned int ret;
 
-	DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n",
-	       expect->seq, exp_irc_info->len, ntohl(tcph->seq));
-
 	/* Reply comes from server. */
 	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
 	exp->dir = IP_CT_DIR_REPLY;
@@ -64,8 +55,8 @@ static unsigned int help(struct sk_buff **pskb,
 
 	ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip);
 	sprintf(buffer, "%u %u", ip, port);
-	DEBUGP("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n",
-	       buffer, NIPQUAD(ip), port);
+	pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n",
+		 buffer, NIPQUAD(ip), port);
 
 	ret = nf_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
 				       matchoff, matchlen, buffer,
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index deb80ae..984ec83 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -37,14 +37,6 @@ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
 MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
 MODULE_ALIAS("ip_nat_pptp");
 
-#if 0
-extern const char *pptp_msg_name[];
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
-				       __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
 static void pptp_nat_expected(struct nf_conn *ct,
 			      struct nf_conntrack_expect *exp)
 {
@@ -60,7 +52,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 
 	/* And here goes the grand finale of corrosion... */
 	if (exp->dir == IP_CT_DIR_ORIGINAL) {
-		DEBUGP("we are PNS->PAC\n");
+		pr_debug("we are PNS->PAC\n");
 		/* therefore, build tuple for PAC->PNS */
 		t.src.l3num = AF_INET;
 		t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip;
@@ -69,7 +61,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 		t.dst.u.gre.key = ct_pptp_info->pns_call_id;
 		t.dst.protonum = IPPROTO_GRE;
 	} else {
-		DEBUGP("we are PAC->PNS\n");
+		pr_debug("we are PAC->PNS\n");
 		/* build tuple for PNS->PAC */
 		t.src.l3num = AF_INET;
 		t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip;
@@ -79,15 +71,15 @@ static void pptp_nat_expected(struct nf_conn *ct,
 		t.dst.protonum = IPPROTO_GRE;
 	}
 
-	DEBUGP("trying to unexpect other dir: ");
+	pr_debug("trying to unexpect other dir: ");
 	NF_CT_DUMP_TUPLE(&t);
 	other_exp = nf_ct_expect_find_get(&t);
 	if (other_exp) {
 		nf_ct_unexpect_related(other_exp);
 		nf_ct_expect_put(other_exp);
-		DEBUGP("success\n");
+		pr_debug("success\n");
 	} else {
-		DEBUGP("not found!\n");
+		pr_debug("not found!\n");
 	}
 
 	/* This must be a fresh one. */
@@ -161,9 +153,9 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
 		break;
 	default:
-		DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
-		      (msg <= PPTP_MSG_MAX)?
-		      pptp_msg_name[msg]:pptp_msg_name[0]);
+		pr_debug("unknown outbound packet 0x%04x:%s\n", msg,
+			 msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] :
+					       pptp_msg_name[0]);
 		/* fall through */
 	case PPTP_SET_LINK_INFO:
 		/* only need to NAT in case PAC is behind NAT box */
@@ -179,8 +171,8 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 
 	/* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
 	 * down to here */
-	DEBUGP("altering call id from 0x%04x to 0x%04x\n",
-		ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
+	pr_debug("altering call id from 0x%04x to 0x%04x\n",
+		 ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
 
 	/* mangle packet */
 	if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
@@ -255,8 +247,9 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
 		break;
 	default:
-		DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
-			pptp_msg_name[msg]:pptp_msg_name[0]);
+		pr_debug("unknown inbound packet %s\n",
+			 msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] :
+					       pptp_msg_name[0]);
 		/* fall through */
 	case PPTP_START_SESSION_REQUEST:
 	case PPTP_START_SESSION_REPLY:
@@ -272,8 +265,8 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 	 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
 
 	/* mangle packet */
-	DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
-		ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
+	pr_debug("altering peer call id from 0x%04x to 0x%04x\n",
+		 ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
 
 	if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
 				     pcid_off + sizeof(struct pptp_pkt_hdr) +
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index c3908bc..2e40cc8 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -36,13 +36,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
 MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
 
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
-				       __FUNCTION__, ## args)
-#else
-#define DEBUGP(x, args...)
-#endif
-
 /* is key in given range between min and max */
 static int
 gre_in_range(const struct nf_conntrack_tuple *tuple,
@@ -83,7 +76,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 		keyptr = &tuple->dst.u.gre.key;
 
 	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
-		DEBUGP("%p: NATing GRE PPTP\n", conntrack);
+		pr_debug("%p: NATing GRE PPTP\n", conntrack);
 		min = 1;
 		range_size = 0xffff;
 	} else {
@@ -91,7 +84,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 		range_size = ntohs(range->max.gre.key) - min + 1;
 	}
 
-	DEBUGP("min = %u, range_size = %u\n", min, range_size);
+	pr_debug("min = %u, range_size = %u\n", min, range_size);
 
 	for (i = 0; i < range_size; i++, key++) {
 		*keyptr = htons(min + key % range_size);
@@ -99,7 +92,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 			return 1;
 	}
 
-	DEBUGP("%p: no NAT mapping\n", conntrack);
+	pr_debug("%p: no NAT mapping\n", conntrack);
 	return 0;
 }
 
@@ -132,11 +125,11 @@ gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff,
 		 * Try to behave like "nf_nat_proto_unknown" */
 		break;
 	case GRE_VERSION_PPTP:
-		DEBUGP("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
+		pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
 		pgreh->call_id = tuple->dst.u.gre.key;
 		break;
 	default:
-		DEBUGP("can't nat unknown GRE version\n");
+		pr_debug("can't nat unknown GRE version\n");
 		return 0;
 	}
 	return 1;
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 080393a..0f45427 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -24,12 +24,6 @@
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_rule.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 #define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
 
 static struct
@@ -186,8 +180,8 @@ alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
 	struct nf_nat_range range
 		= { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } };
 
-	DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n",
-	       ct, NIPQUAD(ip));
+	pr_debug("Allocating NULL binding for %p (%u.%u.%u.%u)\n",
+		 ct, NIPQUAD(ip));
 	return nf_nat_setup_info(ct, &range, hooknum);
 }
 
@@ -205,8 +199,8 @@ alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum)
 	struct nf_nat_range range
 		= { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } };
 
-	DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
-	       ct, NIPQUAD(ip));
+	pr_debug("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
+		 ct, NIPQUAD(ip));
 	return nf_nat_setup_info(ct, &range, hooknum);
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 940cdfc..a889ec3 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -26,12 +26,6 @@ MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
 MODULE_DESCRIPTION("SIP NAT helper");
 MODULE_ALIAS("ip_nat_sip");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 struct addr_map {
 	struct {
 		char		src[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
@@ -257,8 +251,6 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb,
 	__be32 newip;
 	u_int16_t port;
 
-	DEBUGP("ip_nat_sdp():\n");
-
 	/* Connection will come from reply */
 	if (ct->tuplehash[dir].tuple.src.u3.ip ==
 	    ct->tuplehash[!dir].tuple.dst.u3.ip)
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 30eeaa4..332814d 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -27,12 +27,6 @@
 #include <net/netfilter/nf_nat_helper.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 #ifdef CONFIG_XFRM
 static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
 {
@@ -117,7 +111,7 @@ nf_nat_fn(unsigned int hooknum,
 	if (!nat) {
 		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
 		if (nat == NULL) {
-			DEBUGP("failed to add NAT extension\n");
+			pr_debug("failed to add NAT extension\n");
 			return NF_ACCEPT;
 		}
 	}
@@ -154,9 +148,9 @@ nf_nat_fn(unsigned int hooknum,
 				return ret;
 			}
 		} else
-			DEBUGP("Already setup manip %s for ct %p\n",
-			       maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
-			       ct);
+			pr_debug("Already setup manip %s for ct %p\n",
+				 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
+				 ct);
 		break;
 
 	default:
@@ -270,7 +264,7 @@ nf_nat_adjust(unsigned int hooknum,
 
 	ct = nf_ct_get(*pskb, &ctinfo);
 	if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
-		DEBUGP("nf_nat_standalone: adjusting sequence number\n");
+		pr_debug("nf_nat_standalone: adjusting sequence number\n");
 		if (!nf_nat_seq_adjust(pskb, ct, ctinfo))
 			return NF_DROP;
 	}
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 540bf14..b05327e 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -32,12 +32,6 @@ struct in_device;
 #include <net/route.h>
 #include <linux/netfilter_ipv6/ip6t_LOG.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* Use lock to serialize, so printks don't overlap */
 static DEFINE_SPINLOCK(log_lock);
 
@@ -466,12 +460,12 @@ static bool ip6t_log_checkentry(const char *tablename,
 	const struct ip6t_log_info *loginfo = targinfo;
 
 	if (loginfo->level >= 8) {
-		DEBUGP("LOG: level %u >= 8\n", loginfo->level);
+		pr_debug("LOG: level %u >= 8\n", loginfo->level);
 		return false;
 	}
 	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
-		DEBUGP("LOG: prefix term %i\n",
-		       loginfo->prefix[sizeof(loginfo->prefix)-1]);
+		pr_debug("LOG: prefix term %i\n",
+			 loginfo->prefix[sizeof(loginfo->prefix)-1]);
 		return false;
 	}
 	return true;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 14008dc..2f487cd 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -34,12 +34,6 @@ MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
 MODULE_DESCRIPTION("IP6 tables REJECT target module");
 MODULE_LICENSE("GPL");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* Send RST reply */
 static void send_reset(struct sk_buff *oldskb)
 {
@@ -54,7 +48,7 @@ static void send_reset(struct sk_buff *oldskb)
 
 	if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
 	    (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
-		DEBUGP("ip6t_REJECT: addr is not unicast.\n");
+		pr_debug("ip6t_REJECT: addr is not unicast.\n");
 		return;
 	}
 
@@ -62,7 +56,7 @@ static void send_reset(struct sk_buff *oldskb)
 	tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto);
 
 	if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
-		DEBUGP("ip6t_REJECT: Can't get TCP header.\n");
+		pr_debug("ip6t_REJECT: Can't get TCP header.\n");
 		return;
 	}
 
@@ -70,8 +64,9 @@ static void send_reset(struct sk_buff *oldskb)
 
 	/* IP header checks: fragment, too short. */
 	if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
-		DEBUGP("ip6t_REJECT: proto(%d) != IPPROTO_TCP, or too short. otcplen = %d\n",
-			proto, otcplen);
+		pr_debug("ip6t_REJECT: proto(%d) != IPPROTO_TCP, "
+			 "or too short. otcplen = %d\n",
+			 proto, otcplen);
 		return;
 	}
 
@@ -80,14 +75,14 @@ static void send_reset(struct sk_buff *oldskb)
 
 	/* No RST for RST. */
 	if (otcph.rst) {
-		DEBUGP("ip6t_REJECT: RST is set\n");
+		pr_debug("ip6t_REJECT: RST is set\n");
 		return;
 	}
 
 	/* Check checksum. */
 	if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP,
 			    skb_checksum(oldskb, tcphoff, otcplen, 0))) {
-		DEBUGP("ip6t_REJECT: TCP checksum is invalid\n");
+		pr_debug("ip6t_REJECT: TCP checksum is invalid\n");
 		return;
 	}
 
@@ -186,7 +181,7 @@ static unsigned int reject6_target(struct sk_buff **pskb,
 {
 	const struct ip6t_reject_info *reject = targinfo;
 
-	DEBUGP(KERN_DEBUG "%s: medium point\n", __FUNCTION__);
+	pr_debug("%s: medium point\n", __FUNCTION__);
 	/* WARNING: This code causes reentry within ip6tables.
 	   This means that the ip6tables jump stack is now crap.  We
 	   must return an absolute verdict. --RR */
@@ -237,7 +232,7 @@ static bool check(const char *tablename,
 		/* Must specify that it's a TCP packet */
 		if (e->ipv6.proto != IPPROTO_TCP
 		    || (e->ipv6.invflags & XT_INV_PROTO)) {
-			DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n");
+			printk("ip6t_REJECT: TCP_RESET illegal for non-tcp\n");
 			return false;
 		}
 	}
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index a9fe2aa..2a25fe25 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -23,21 +23,16 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("IPv6 AH match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* Returns 1 if the spi is matched by the range, 0 otherwise */
 static inline bool
 spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 {
 	bool r;
-	DEBUGP("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
-	       min,spi,max);
+
+	pr_debug("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",
+		 invert ? '!' : ' ', min, spi, max);
 	r = (spi >= min && spi <= max) ^ invert;
-	DEBUGP(" result %s\n",r? "PASS\n" : "FAILED\n");
+	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
 	return r;
 }
 
@@ -73,22 +68,22 @@ match(const struct sk_buff *skb,
 
 	hdrlen = (ah->hdrlen + 2) << 2;
 
-	DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen);
-	DEBUGP("RES %04X ", ah->reserved);
-	DEBUGP("SPI %u %08X\n", ntohl(ah->spi), ntohl(ah->spi));
-
-	DEBUGP("IPv6 AH spi %02X ",
-	       spi_match(ahinfo->spis[0], ahinfo->spis[1],
-			 ntohl(ah->spi),
-			 !!(ahinfo->invflags & IP6T_AH_INV_SPI)));
-	DEBUGP("len %02X %04X %02X ",
-	       ahinfo->hdrlen, hdrlen,
-	       (!ahinfo->hdrlen ||
-		(ahinfo->hdrlen == hdrlen) ^
-		!!(ahinfo->invflags & IP6T_AH_INV_LEN)));
-	DEBUGP("res %02X %04X %02X\n",
-	       ahinfo->hdrres, ah->reserved,
-	       !(ahinfo->hdrres && ah->reserved));
+	pr_debug("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen);
+	pr_debug("RES %04X ", ah->reserved);
+	pr_debug("SPI %u %08X\n", ntohl(ah->spi), ntohl(ah->spi));
+
+	pr_debug("IPv6 AH spi %02X ",
+		 spi_match(ahinfo->spis[0], ahinfo->spis[1],
+			   ntohl(ah->spi),
+			   !!(ahinfo->invflags & IP6T_AH_INV_SPI)));
+	pr_debug("len %02X %04X %02X ",
+		 ahinfo->hdrlen, hdrlen,
+		 (!ahinfo->hdrlen ||
+		  (ahinfo->hdrlen == hdrlen) ^
+		  !!(ahinfo->invflags & IP6T_AH_INV_LEN)));
+	pr_debug("res %02X %04X %02X\n",
+		 ahinfo->hdrres, ah->reserved,
+		 !(ahinfo->hdrres && ah->reserved));
 
 	return (ah != NULL)
 	       &&
@@ -114,7 +109,7 @@ checkentry(const char *tablename,
 	const struct ip6t_ah *ahinfo = matchinfo;
 
 	if (ahinfo->invflags & ~IP6T_AH_INV_MASK) {
-		DEBUGP("ip6t_ah: unknown flags %X\n", ahinfo->invflags);
+		pr_debug("ip6t_ah: unknown flags %X\n", ahinfo->invflags);
 		return false;
 	}
 	return true;
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index bb1cfa8..968aeba 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -22,21 +22,15 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("IPv6 FRAG match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* Returns 1 if the id is matched by the range, 0 otherwise */
 static inline bool
 id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 {
 	bool r;
-	DEBUGP("frag id_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ',
-	       min, id, max);
+	pr_debug("frag id_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ',
+		 min, id, max);
 	r = (id >= min && id <= max) ^ invert;
-	DEBUGP(" result %s\n", r ? "PASS" : "FAILED");
+	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
 	return r;
 }
 
@@ -69,37 +63,37 @@ match(const struct sk_buff *skb,
 		return false;
 	}
 
-	DEBUGP("INFO %04X ", fh->frag_off);
-	DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7);
-	DEBUGP("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6);
-	DEBUGP("MF %04X ", fh->frag_off & htons(IP6_MF));
-	DEBUGP("ID %u %08X\n", ntohl(fh->identification),
-	       ntohl(fh->identification));
-
-	DEBUGP("IPv6 FRAG id %02X ",
-	       id_match(fraginfo->ids[0], fraginfo->ids[1],
-			 ntohl(fh->identification),
-			 !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)));
-	DEBUGP("res %02X %02X%04X %02X ",
-	       fraginfo->flags & IP6T_FRAG_RES, fh->reserved,
-	       ntohs(fh->frag_off) & 0x6,
-	       !((fraginfo->flags & IP6T_FRAG_RES)
-		 && (fh->reserved || (ntohs(fh->frag_off) & 0x06))));
-	DEBUGP("first %02X %02X %02X ",
-	       fraginfo->flags & IP6T_FRAG_FST,
-	       ntohs(fh->frag_off) & ~0x7,
-	       !((fraginfo->flags & IP6T_FRAG_FST)
-		 && (ntohs(fh->frag_off) & ~0x7)));
-	DEBUGP("mf %02X %02X %02X ",
-	       fraginfo->flags & IP6T_FRAG_MF,
-	       ntohs(fh->frag_off) & IP6_MF,
-	       !((fraginfo->flags & IP6T_FRAG_MF)
-		 && !((ntohs(fh->frag_off) & IP6_MF))));
-	DEBUGP("last %02X %02X %02X\n",
-	       fraginfo->flags & IP6T_FRAG_NMF,
-	       ntohs(fh->frag_off) & IP6_MF,
-	       !((fraginfo->flags & IP6T_FRAG_NMF)
-		 && (ntohs(fh->frag_off) & IP6_MF)));
+	pr_debug("INFO %04X ", fh->frag_off);
+	pr_debug("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7);
+	pr_debug("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6);
+	pr_debug("MF %04X ", fh->frag_off & htons(IP6_MF));
+	pr_debug("ID %u %08X\n", ntohl(fh->identification),
+		 ntohl(fh->identification));
+
+	pr_debug("IPv6 FRAG id %02X ",
+		 id_match(fraginfo->ids[0], fraginfo->ids[1],
+			  ntohl(fh->identification),
+			  !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)));
+	pr_debug("res %02X %02X%04X %02X ",
+		 fraginfo->flags & IP6T_FRAG_RES, fh->reserved,
+		 ntohs(fh->frag_off) & 0x6,
+		 !((fraginfo->flags & IP6T_FRAG_RES)
+		   && (fh->reserved || (ntohs(fh->frag_off) & 0x06))));
+	pr_debug("first %02X %02X %02X ",
+		 fraginfo->flags & IP6T_FRAG_FST,
+		 ntohs(fh->frag_off) & ~0x7,
+		 !((fraginfo->flags & IP6T_FRAG_FST)
+		   && (ntohs(fh->frag_off) & ~0x7)));
+	pr_debug("mf %02X %02X %02X ",
+		 fraginfo->flags & IP6T_FRAG_MF,
+		 ntohs(fh->frag_off) & IP6_MF,
+		 !((fraginfo->flags & IP6T_FRAG_MF)
+		   && !((ntohs(fh->frag_off) & IP6_MF))));
+	pr_debug("last %02X %02X %02X\n",
+		 fraginfo->flags & IP6T_FRAG_NMF,
+		 ntohs(fh->frag_off) & IP6_MF,
+		 !((fraginfo->flags & IP6T_FRAG_NMF)
+		   && (ntohs(fh->frag_off) & IP6_MF)));
 
 	return (fh != NULL)
 	       &&
@@ -131,7 +125,7 @@ checkentry(const char *tablename,
 	const struct ip6t_frag *fraginfo = matchinfo;
 
 	if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
-		DEBUGP("ip6t_frag: unknown flags %X\n", fraginfo->invflags);
+		pr_debug("ip6t_frag: unknown flags %X\n", fraginfo->invflags);
 		return false;
 	}
 	return true;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 6247d4c..e6ca601 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -25,12 +25,6 @@ MODULE_DESCRIPTION("IPv6 opts match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 MODULE_ALIAS("ip6t_dst");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /*
  *  (Type & 0xC0) >> 6
  *	0	-> ignorable
@@ -90,13 +84,13 @@ match(const struct sk_buff *skb,
 		return false;
 	}
 
-	DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
+	pr_debug("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
 
-	DEBUGP("len %02X %04X %02X ",
-	       optinfo->hdrlen, hdrlen,
-	       (!(optinfo->flags & IP6T_OPTS_LEN) ||
-		((optinfo->hdrlen == hdrlen) ^
-		 !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
+	pr_debug("len %02X %04X %02X ",
+		 optinfo->hdrlen, hdrlen,
+		 (!(optinfo->flags & IP6T_OPTS_LEN) ||
+		  ((optinfo->hdrlen == hdrlen) ^
+		   !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
 
 	ret = (oh != NULL) &&
 	      (!(optinfo->flags & IP6T_OPTS_LEN) ||
@@ -108,10 +102,10 @@ match(const struct sk_buff *skb,
 	if (!(optinfo->flags & IP6T_OPTS_OPTS)) {
 		return ret;
 	} else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
-		DEBUGP("Not strict - not implemented");
+		pr_debug("Not strict - not implemented");
 	} else {
-		DEBUGP("Strict ");
-		DEBUGP("#%d ", optinfo->optsnr);
+		pr_debug("Strict ");
+		pr_debug("#%d ", optinfo->optsnr);
 		for (temp = 0; temp < optinfo->optsnr; temp++) {
 			/* type field exists ? */
 			if (hdrlen < 1)
@@ -123,12 +117,11 @@ match(const struct sk_buff *skb,
 
 			/* Type check */
 			if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) {
-				DEBUGP("Tbad %02X %02X\n",
-				       *tp,
-				       (optinfo->opts[temp] & 0xFF00) >> 8);
+				pr_debug("Tbad %02X %02X\n", *tp,
+					 (optinfo->opts[temp] & 0xFF00) >> 8);
 				return false;
 			} else {
-				DEBUGP("Tok ");
+				pr_debug("Tok ");
 			}
 			/* Length check */
 			if (*tp) {
@@ -145,23 +138,23 @@ match(const struct sk_buff *skb,
 				spec_len = optinfo->opts[temp] & 0x00FF;
 
 				if (spec_len != 0x00FF && spec_len != *lp) {
-					DEBUGP("Lbad %02X %04X\n", *lp,
-					       spec_len);
+					pr_debug("Lbad %02X %04X\n", *lp,
+						 spec_len);
 					return false;
 				}
-				DEBUGP("Lok ");
+				pr_debug("Lok ");
 				optlen = *lp + 2;
 			} else {
-				DEBUGP("Pad1\n");
+				pr_debug("Pad1\n");
 				optlen = 1;
 			}
 
 			/* Step to the next */
-			DEBUGP("len%04X \n", optlen);
+			pr_debug("len%04X \n", optlen);
 
 			if ((ptr > skb->len - optlen || hdrlen < optlen) &&
 			    temp < optinfo->optsnr - 1) {
-				DEBUGP("new pointer is too large! \n");
+				pr_debug("new pointer is too large! \n");
 				break;
 			}
 			ptr += optlen;
@@ -187,7 +180,7 @@ checkentry(const char *tablename,
 	const struct ip6t_opts *optsinfo = matchinfo;
 
 	if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
-		DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags);
+		pr_debug("ip6t_opts: unknown flags %X\n", optsinfo->invflags);
 		return false;
 	}
 	return true;
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 549deea..357cea7 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -24,21 +24,15 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("IPv6 RT match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* Returns 1 if the id is matched by the range, 0 otherwise */
 static inline bool
 segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 {
 	bool r;
-	DEBUGP("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x",
-	       invert ? '!' : ' ', min, id, max);
+	pr_debug("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x",
+		 invert ? '!' : ' ', min, id, max);
 	r = (id >= min && id <= max) ^ invert;
-	DEBUGP(" result %s\n", r ? "PASS" : "FAILED");
+	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
 	return r;
 }
 
@@ -82,29 +76,29 @@ match(const struct sk_buff *skb,
 		return false;
 	}
 
-	DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
-	DEBUGP("TYPE %04X ", rh->type);
-	DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
-
-	DEBUGP("IPv6 RT segsleft %02X ",
-	       segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
-			      rh->segments_left,
-			      !!(rtinfo->invflags & IP6T_RT_INV_SGS)));
-	DEBUGP("type %02X %02X %02X ",
-	       rtinfo->rt_type, rh->type,
-	       (!(rtinfo->flags & IP6T_RT_TYP) ||
-		((rtinfo->rt_type == rh->type) ^
-		 !!(rtinfo->invflags & IP6T_RT_INV_TYP))));
-	DEBUGP("len %02X %04X %02X ",
-	       rtinfo->hdrlen, hdrlen,
-	       !(rtinfo->flags & IP6T_RT_LEN) ||
-		((rtinfo->hdrlen == hdrlen) ^
-		 !!(rtinfo->invflags & IP6T_RT_INV_LEN)));
-	DEBUGP("res %02X %02X %02X ",
-	       rtinfo->flags & IP6T_RT_RES,
-	       ((const struct rt0_hdr *)rh)->reserved,
-	       !((rtinfo->flags & IP6T_RT_RES) &&
-		 (((const struct rt0_hdr *)rh)->reserved)));
+	pr_debug("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
+	pr_debug("TYPE %04X ", rh->type);
+	pr_debug("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
+
+	pr_debug("IPv6 RT segsleft %02X ",
+		 segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
+				rh->segments_left,
+				!!(rtinfo->invflags & IP6T_RT_INV_SGS)));
+	pr_debug("type %02X %02X %02X ",
+		 rtinfo->rt_type, rh->type,
+		 (!(rtinfo->flags & IP6T_RT_TYP) ||
+		  ((rtinfo->rt_type == rh->type) ^
+		   !!(rtinfo->invflags & IP6T_RT_INV_TYP))));
+	pr_debug("len %02X %04X %02X ",
+		 rtinfo->hdrlen, hdrlen,
+		 !(rtinfo->flags & IP6T_RT_LEN) ||
+		  ((rtinfo->hdrlen == hdrlen) ^
+		   !!(rtinfo->invflags & IP6T_RT_INV_LEN)));
+	pr_debug("res %02X %02X %02X ",
+		 rtinfo->flags & IP6T_RT_RES,
+		 ((const struct rt0_hdr *)rh)->reserved,
+		 !((rtinfo->flags & IP6T_RT_RES) &&
+		   (((const struct rt0_hdr *)rh)->reserved)));
 
 	ret = (rh != NULL)
 	      &&
@@ -131,18 +125,18 @@ match(const struct sk_buff *skb,
 		ret = (*rp == 0);
 	}
 
-	DEBUGP("#%d ", rtinfo->addrnr);
+	pr_debug("#%d ", rtinfo->addrnr);
 	if (!(rtinfo->flags & IP6T_RT_FST)) {
 		return ret;
 	} else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) {
-		DEBUGP("Not strict ");
+		pr_debug("Not strict ");
 		if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
-			DEBUGP("There isn't enough space\n");
+			pr_debug("There isn't enough space\n");
 			return false;
 		} else {
 			unsigned int i = 0;
 
-			DEBUGP("#%d ", rtinfo->addrnr);
+			pr_debug("#%d ", rtinfo->addrnr);
 			for (temp = 0;
 			     temp < (unsigned int)((hdrlen - 8) / 16);
 			     temp++) {
@@ -156,25 +150,25 @@ match(const struct sk_buff *skb,
 				BUG_ON(ap == NULL);
 
 				if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) {
-					DEBUGP("i=%d temp=%d;\n", i, temp);
+					pr_debug("i=%d temp=%d;\n", i, temp);
 					i++;
 				}
 				if (i == rtinfo->addrnr)
 					break;
 			}
-			DEBUGP("i=%d #%d\n", i, rtinfo->addrnr);
+			pr_debug("i=%d #%d\n", i, rtinfo->addrnr);
 			if (i == rtinfo->addrnr)
 				return ret;
 			else
 				return false;
 		}
 	} else {
-		DEBUGP("Strict ");
+		pr_debug("Strict ");
 		if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
-			DEBUGP("There isn't enough space\n");
+			pr_debug("There isn't enough space\n");
 			return false;
 		} else {
-			DEBUGP("#%d ", rtinfo->addrnr);
+			pr_debug("#%d ", rtinfo->addrnr);
 			for (temp = 0; temp < rtinfo->addrnr; temp++) {
 				ap = skb_header_pointer(skb,
 							ptr
@@ -187,7 +181,7 @@ match(const struct sk_buff *skb,
 				if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp]))
 					break;
 			}
-			DEBUGP("temp=%d #%d\n", temp, rtinfo->addrnr);
+			pr_debug("temp=%d #%d\n", temp, rtinfo->addrnr);
 			if (temp == rtinfo->addrnr &&
 			    temp == (unsigned int)((hdrlen - 8) / 16))
 				return ret;
@@ -210,14 +204,14 @@ checkentry(const char *tablename,
 	const struct ip6t_rt *rtinfo = matchinfo;
 
 	if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
-		DEBUGP("ip6t_rt: unknown flags %X\n", rtinfo->invflags);
+		pr_debug("ip6t_rt: unknown flags %X\n", rtinfo->invflags);
 		return false;
 	}
 	if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) &&
 	    (!(rtinfo->flags & IP6T_RT_TYP) ||
 	     (rtinfo->rt_type != 0) ||
 	     (rtinfo->invflags & IP6T_RT_INV_TYP))) {
-		DEBUGP("`--rt-type 0' required before `--rt-0-*'");
+		pr_debug("`--rt-type 0' required before `--rt-0-*'");
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index f2d2649..f0a9efa 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -21,12 +21,6 @@ MODULE_DESCRIPTION("ip6tables mangle table");
 			    (1 << NF_IP6_LOCAL_OUT) | \
 			    (1 << NF_IP6_POST_ROUTING))
 
-#if 0
-#define DEBUGP(x, args...)	printk(KERN_DEBUG x, ## args)
-#else
-#define DEBUGP(x, args...)
-#endif
-
 static struct
 {
 	struct ip6t_replace repl;
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 0acda45..ec290e4 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -8,12 +8,6 @@
 
 #define RAW_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_OUT))
 
-#if 0
-#define DEBUGP(x, args...)	printk(KERN_DEBUG x, ## args)
-#else
-#define DEBUGP(x, args...)
-#endif
-
 static struct
 {
 	struct ip6t_replace repl;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 747b01e..89e20ab 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -26,12 +26,6 @@
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
 			     struct nf_conntrack_tuple *tuple)
 {
@@ -136,7 +130,7 @@ ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
 	 * except of IPv6 & ext headers. but it's tracked anyway. - YK
 	 */
 	if ((protoff < 0) || (protoff > (*pskb)->len)) {
-		DEBUGP("ip6_conntrack_core: can't find proto in pkt\n");
+		pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
 		NF_CT_STAT_INC_ATOMIC(error);
 		NF_CT_STAT_INC_ATOMIC(invalid);
 		return -NF_ACCEPT;
@@ -178,7 +172,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
 	protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
 					 (*pskb)->len - extoff);
 	if (protoff > (*pskb)->len || pnum == NEXTHDR_FRAGMENT) {
-		DEBUGP("proto header not found\n");
+		pr_debug("proto header not found\n");
 		return NF_ACCEPT;
 	}
 
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index a514661..9defc7e 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -27,12 +27,6 @@
 
 static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
 			       unsigned int dataoff,
 			       struct nf_conntrack_tuple *tuple)
@@ -125,8 +119,8 @@ static int icmpv6_new(struct nf_conn *conntrack,
 
 	if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
 		/* Can't create a new ICMPv6 `conn' with this. */
-		DEBUGP("icmpv6: can't create new conn with type %u\n",
-		       type + 128);
+		pr_debug("icmpv6: can't create new conn with type %u\n",
+			 type + 128);
 		NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
 		return 0;
 	}
@@ -152,14 +146,15 @@ icmpv6_error_message(struct sk_buff *skb,
 
 	hp = skb_header_pointer(skb, icmp6off, sizeof(_hdr), &_hdr);
 	if (hp == NULL) {
-		DEBUGP("icmpv6_error: Can't get ICMPv6 hdr.\n");
+		pr_debug("icmpv6_error: Can't get ICMPv6 hdr.\n");
 		return -NF_ACCEPT;
 	}
 
 	inip6off = icmp6off + sizeof(_hdr);
 	if (skb_copy_bits(skb, inip6off+offsetof(struct ipv6hdr, nexthdr),
 			  &inprotonum, sizeof(inprotonum)) != 0) {
-		DEBUGP("icmpv6_error: Can't get nexthdr in inner IPv6 header.\n");
+		pr_debug("icmpv6_error: Can't get nexthdr in inner IPv6 "
+			 "header.\n");
 		return -NF_ACCEPT;
 	}
 	inprotoff = nf_ct_ipv6_skip_exthdr(skb,
@@ -169,7 +164,8 @@ icmpv6_error_message(struct sk_buff *skb,
 						    - sizeof(struct ipv6hdr));
 
 	if ((inprotoff > skb->len) || (inprotonum == NEXTHDR_FRAGMENT)) {
-		DEBUGP("icmpv6_error: Can't get protocol header in ICMPv6 payload.\n");
+		pr_debug("icmpv6_error: Can't get protocol header in ICMPv6 "
+			 "payload.\n");
 		return -NF_ACCEPT;
 	}
 
@@ -179,7 +175,7 @@ icmpv6_error_message(struct sk_buff *skb,
 	/* Are they talking about one of our connections? */
 	if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum,
 			     &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) {
-		DEBUGP("icmpv6_error: Can't get tuple\n");
+		pr_debug("icmpv6_error: Can't get tuple\n");
 		return -NF_ACCEPT;
 	}
 
@@ -187,7 +183,7 @@ icmpv6_error_message(struct sk_buff *skb,
 	   been preserved inside the ICMP. */
 	if (!nf_ct_invert_tuple(&intuple, &origtuple,
 				&nf_conntrack_l3proto_ipv6, inproto)) {
-		DEBUGP("icmpv6_error: Can't invert tuple\n");
+		pr_debug("icmpv6_error: Can't invert tuple\n");
 		return -NF_ACCEPT;
 	}
 
@@ -195,7 +191,7 @@ icmpv6_error_message(struct sk_buff *skb,
 
 	h = nf_conntrack_find_get(&intuple);
 	if (!h) {
-		DEBUGP("icmpv6_error: no match\n");
+		pr_debug("icmpv6_error: no match\n");
 		return -NF_ACCEPT;
 	} else {
 		if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 347ab76..25442a8 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -44,12 +44,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 #define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */
 #define NF_CT_FRAG6_LOW_THRESH 196608  /* == 192*1024 */
 #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
@@ -343,7 +337,7 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src,				   str
 	struct nf_ct_frag6_queue *fq;
 
 	if ((fq = frag_alloc_queue()) == NULL) {
-		DEBUGP("Can't alloc new queue\n");
+		pr_debug("Can't alloc new queue\n");
 		goto oom;
 	}
 
@@ -393,7 +387,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 	int offset, end;
 
 	if (fq->last_in & COMPLETE) {
-		DEBUGP("Allready completed\n");
+		pr_debug("Allready completed\n");
 		goto err;
 	}
 
@@ -402,7 +396,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
-		DEBUGP("offset is too large.\n");
+		pr_debug("offset is too large.\n");
 		return -1;
 	}
 
@@ -420,7 +414,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 		 */
 		if (end < fq->len ||
 		    ((fq->last_in & LAST_IN) && end != fq->len)) {
-			DEBUGP("already received last fragment\n");
+			pr_debug("already received last fragment\n");
 			goto err;
 		}
 		fq->last_in |= LAST_IN;
@@ -433,13 +427,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 			/* RFC2460 says always send parameter problem in
 			 * this case. -DaveM
 			 */
-			DEBUGP("the end of this fragment is not rounded to 8 bytes.\n");
+			pr_debug("end of fragment not rounded to 8 bytes.\n");
 			return -1;
 		}
 		if (end > fq->len) {
 			/* Some bits beyond end -> corruption. */
 			if (fq->last_in & LAST_IN) {
-				DEBUGP("last packet already reached.\n");
+				pr_debug("last packet already reached.\n");
 				goto err;
 			}
 			fq->len = end;
@@ -451,11 +445,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 
 	/* Point into the IP datagram 'data' part. */
 	if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
-		DEBUGP("queue: message is too short.\n");
+		pr_debug("queue: message is too short.\n");
 		goto err;
 	}
 	if (pskb_trim_rcsum(skb, end - offset)) {
-		DEBUGP("Can't trim\n");
+		pr_debug("Can't trim\n");
 		goto err;
 	}
 
@@ -480,11 +474,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 		if (i > 0) {
 			offset += i;
 			if (end <= offset) {
-				DEBUGP("overlap\n");
+				pr_debug("overlap\n");
 				goto err;
 			}
 			if (!pskb_pull(skb, i)) {
-				DEBUGP("Can't pull\n");
+				pr_debug("Can't pull\n");
 				goto err;
 			}
 			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
@@ -503,7 +497,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 			/* Eat head of the next overlapped fragment
 			 * and leave the loop. The next ones cannot overlap.
 			 */
-			DEBUGP("Eat head of the overlapped parts.: %d", i);
+			pr_debug("Eat head of the overlapped parts.: %d", i);
 			if (!pskb_pull(next, i))
 				goto err;
 
@@ -586,13 +580,13 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 		       sizeof(struct ipv6hdr) + fq->len -
 		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN) {
-		DEBUGP("payload len is too large.\n");
+		pr_debug("payload len is too large.\n");
 		goto out_oversize;
 	}
 
 	/* Head of list must not be cloned. */
 	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
-		DEBUGP("skb is cloned but can't expand head");
+		pr_debug("skb is cloned but can't expand head");
 		goto out_oom;
 	}
 
@@ -604,7 +598,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 		int i, plen = 0;
 
 		if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) {
-			DEBUGP("Can't alloc skb\n");
+			pr_debug("Can't alloc skb\n");
 			goto out_oom;
 		}
 		clone->next = head->next;
@@ -719,11 +713,11 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
 			return -1;
 		}
 		if (len < (int)sizeof(struct ipv6_opt_hdr)) {
-			DEBUGP("too short\n");
+			pr_debug("too short\n");
 			return -1;
 		}
 		if (nexthdr == NEXTHDR_NONE) {
-			DEBUGP("next header is none\n");
+			pr_debug("next header is none\n");
 			return -1;
 		}
 		if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
@@ -764,7 +758,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 
 	/* Jumbo payload inhibits frag. header */
 	if (ipv6_hdr(skb)->payload_len == 0) {
-		DEBUGP("payload len = 0\n");
+		pr_debug("payload len = 0\n");
 		return skb;
 	}
 
@@ -773,14 +767,14 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 
 	clone = skb_clone(skb, GFP_ATOMIC);
 	if (clone == NULL) {
-		DEBUGP("Can't clone skb\n");
+		pr_debug("Can't clone skb\n");
 		return skb;
 	}
 
 	NFCT_FRAG6_CB(clone)->orig = skb;
 
 	if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) {
-		DEBUGP("message is too short.\n");
+		pr_debug("message is too short.\n");
 		goto ret_orig;
 	}
 
@@ -789,7 +783,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 	fhdr = (struct frag_hdr *)skb_transport_header(clone);
 
 	if (!(fhdr->frag_off & htons(0xFFF9))) {
-		DEBUGP("Invalid fragment offset\n");
+		pr_debug("Invalid fragment offset\n");
 		/* It is not a fragmented frame */
 		goto ret_orig;
 	}
@@ -799,7 +793,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 
 	fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
 	if (fq == NULL) {
-		DEBUGP("Can't find and can't create new queue\n");
+		pr_debug("Can't find and can't create new queue\n");
 		goto ret_orig;
 	}
 
@@ -807,7 +801,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 
 	if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
 		spin_unlock(&fq->lock);
-		DEBUGP("Can't insert skb to queue\n");
+		pr_debug("Can't insert skb to queue\n");
 		fq_put(fq, NULL);
 		goto ret_orig;
 	}
@@ -815,7 +809,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 	if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) {
 		ret_skb = nf_ct_frag6_reasm(fq, dev);
 		if (ret_skb == NULL)
-			DEBUGP("Can't reassemble fragmented packets\n");
+			pr_debug("Can't reassemble fragmented packets\n");
 	}
 	spin_unlock(&fq->lock);
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 472396d..3d14110 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -40,12 +40,6 @@
 
 #define NF_CONNTRACK_VERSION	"0.5.0"
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 DEFINE_RWLOCK(nf_conntrack_lock);
 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
 
@@ -141,7 +135,7 @@ EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
 static void
 clean_from_lists(struct nf_conn *ct)
 {
-	DEBUGP("clean_from_lists(%p)\n", ct);
+	pr_debug("clean_from_lists(%p)\n", ct);
 	hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
 	hlist_del(&ct->tuplehash[IP_CT_DIR_REPLY].hnode);
 
@@ -155,7 +149,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	struct nf_conn *ct = (struct nf_conn *)nfct;
 	struct nf_conntrack_l4proto *l4proto;
 
-	DEBUGP("destroy_conntrack(%p)\n", ct);
+	pr_debug("destroy_conntrack(%p)\n", ct);
 	NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
 	NF_CT_ASSERT(!timer_pending(&ct->timeout));
 
@@ -194,7 +188,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	if (ct->master)
 		nf_ct_put(ct->master);
 
-	DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
+	pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
 	nf_conntrack_free(ct);
 }
 
@@ -313,7 +307,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb)
 	/* No external references means noone else could have
 	   confirmed us. */
 	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
-	DEBUGP("Confirming conntrack %p\n", ct);
+	pr_debug("Confirming conntrack %p\n", ct);
 
 	write_lock_bh(&nf_conntrack_lock);
 
@@ -446,7 +440,7 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 
 	conntrack = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC);
 	if (conntrack == NULL) {
-		DEBUGP("nf_conntrack_alloc: Can't alloc conntrack.\n");
+		pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
 		atomic_dec(&nf_conntrack_count);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -485,27 +479,27 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 	struct nf_conntrack_expect *exp;
 
 	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
-		DEBUGP("Can't invert tuple.\n");
+		pr_debug("Can't invert tuple.\n");
 		return NULL;
 	}
 
 	conntrack = nf_conntrack_alloc(tuple, &repl_tuple);
 	if (conntrack == NULL || IS_ERR(conntrack)) {
-		DEBUGP("Can't allocate conntrack.\n");
+		pr_debug("Can't allocate conntrack.\n");
 		return (struct nf_conntrack_tuple_hash *)conntrack;
 	}
 
 	if (!l4proto->new(conntrack, skb, dataoff)) {
 		nf_conntrack_free(conntrack);
-		DEBUGP("init conntrack: can't track with proto module\n");
+		pr_debug("init conntrack: can't track with proto module\n");
 		return NULL;
 	}
 
 	write_lock_bh(&nf_conntrack_lock);
 	exp = nf_ct_find_expectation(tuple);
 	if (exp) {
-		DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
-			conntrack, exp);
+		pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
+			 conntrack, exp);
 		/* Welcome, Mr. Bond.  We've been expecting you... */
 		__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
 		conntrack->master = exp->master;
@@ -568,7 +562,7 @@ resolve_normal_ct(struct sk_buff *skb,
 	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
 			     dataoff, l3num, protonum, &tuple, l3proto,
 			     l4proto)) {
-		DEBUGP("resolve_normal_ct: Can't get tuple\n");
+		pr_debug("resolve_normal_ct: Can't get tuple\n");
 		return NULL;
 	}
 
@@ -591,13 +585,14 @@ resolve_normal_ct(struct sk_buff *skb,
 	} else {
 		/* Once we've had two way comms, always ESTABLISHED. */
 		if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
-			DEBUGP("nf_conntrack_in: normal packet for %p\n", ct);
+			pr_debug("nf_conntrack_in: normal packet for %p\n", ct);
 			*ctinfo = IP_CT_ESTABLISHED;
 		} else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
-			DEBUGP("nf_conntrack_in: related packet for %p\n", ct);
+			pr_debug("nf_conntrack_in: related packet for %p\n",
+				 ct);
 			*ctinfo = IP_CT_RELATED;
 		} else {
-			DEBUGP("nf_conntrack_in: new packet for %p\n", ct);
+			pr_debug("nf_conntrack_in: new packet for %p\n", ct);
 			*ctinfo = IP_CT_NEW;
 		}
 		*set_reply = 0;
@@ -629,7 +624,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
 	l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
 
 	if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
-		DEBUGP("not prepared to track yet or error occured\n");
+		pr_debug("not prepared to track yet or error occured\n");
 		return -ret;
 	}
 
@@ -665,7 +660,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
 	if (ret < 0) {
 		/* Invalid: inverse of the return code tells
 		 * the netfilter core what to do */
-		DEBUGP("nf_conntrack_in: Can't track with proto module\n");
+		pr_debug("nf_conntrack_in: Can't track with proto module\n");
 		nf_conntrack_put((*pskb)->nfct);
 		(*pskb)->nfct = NULL;
 		NF_CT_STAT_INC_ATOMIC(invalid);
@@ -706,7 +701,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 	/* Should be unconfirmed, so not in hash table yet */
 	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
 
-	DEBUGP("Altering reply tuple of %p to ", ct);
+	pr_debug("Altering reply tuple of %p to ", ct);
 	NF_CT_DUMP_TUPLE(newreply);
 
 	ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index cd9c2d0..c763ee7 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -51,12 +51,6 @@ unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
 				struct nf_conntrack_expect *exp);
 EXPORT_SYMBOL_GPL(nf_nat_ftp_hook);
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char);
 static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char);
 static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *,
@@ -138,13 +132,13 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[],
 			if (*data == term && i == array_size - 1)
 				return len;
 
-			DEBUGP("Char %u (got %u nums) `%u' unexpected\n",
-			       len, i, *data);
+			pr_debug("Char %u (got %u nums) `%u' unexpected\n",
+				 len, i, *data);
 			return 0;
 		}
 	}
-	DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep);
-
+	pr_debug("Failed to fill %u numbers separated by %c\n",
+		 array_size, sep);
 	return 0;
 }
 
@@ -178,13 +172,13 @@ static int get_port(const char *data, int start, size_t dlen, char delim,
 			if (tmp_port == 0)
 				break;
 			*port = htons(tmp_port);
-			DEBUGP("get_port: return %d\n", tmp_port);
+			pr_debug("get_port: return %d\n", tmp_port);
 			return i + 1;
 		}
 		else if (data[i] >= '0' && data[i] <= '9')
 			tmp_port = tmp_port*10 + data[i] - '0';
 		else { /* Some other crap */
-			DEBUGP("get_port: invalid char.\n");
+			pr_debug("get_port: invalid char.\n");
 			break;
 		}
 	}
@@ -201,22 +195,22 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
 	/* First character is delimiter, then "1" for IPv4 or "2" for IPv6,
 	   then delimiter again. */
 	if (dlen <= 3) {
-		DEBUGP("EPRT: too short\n");
+		pr_debug("EPRT: too short\n");
 		return 0;
 	}
 	delim = data[0];
 	if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) {
-		DEBUGP("try_eprt: invalid delimitter.\n");
+		pr_debug("try_eprt: invalid delimitter.\n");
 		return 0;
 	}
 
 	if ((cmd->l3num == PF_INET && data[1] != '1') ||
 	    (cmd->l3num == PF_INET6 && data[1] != '2')) {
-		DEBUGP("EPRT: invalid protocol number.\n");
+		pr_debug("EPRT: invalid protocol number.\n");
 		return 0;
 	}
 
-	DEBUGP("EPRT: Got %c%c%c\n", delim, data[1], delim);
+	pr_debug("EPRT: Got %c%c%c\n", delim, data[1], delim);
 
 	if (data[1] == '1') {
 		u_int32_t array[4];
@@ -234,7 +228,7 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
 
 	if (length == 0)
 		return 0;
-	DEBUGP("EPRT: Got IP address!\n");
+	pr_debug("EPRT: Got IP address!\n");
 	/* Start offset includes initial "|1|", and trailing delimiter */
 	return get_port(data, 3 + length + 1, dlen, delim, &cmd->u.tcp.port);
 }
@@ -267,7 +261,7 @@ static int find_pattern(const char *data, size_t dlen,
 {
 	size_t i;
 
-	DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen);
+	pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen);
 	if (dlen == 0)
 		return 0;
 
@@ -282,17 +276,17 @@ static int find_pattern(const char *data, size_t dlen,
 #if 0
 		size_t i;
 
-		DEBUGP("ftp: string mismatch\n");
+		pr_debug("ftp: string mismatch\n");
 		for (i = 0; i < plen; i++) {
-			DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
-				i, data[i], data[i],
-				pattern[i], pattern[i]);
+			pr_debug("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
+				 i, data[i], data[i],
+				 pattern[i], pattern[i]);
 		}
 #endif
 		return 0;
 	}
 
-	DEBUGP("Pattern matches!\n");
+	pr_debug("Pattern matches!\n");
 	/* Now we've found the constant string, try to skip
 	   to the 'skip' character */
 	for (i = plen; data[i] != skip; i++)
@@ -301,14 +295,14 @@ static int find_pattern(const char *data, size_t dlen,
 	/* Skip over the last character */
 	i++;
 
-	DEBUGP("Skipped up to `%c'!\n", skip);
+	pr_debug("Skipped up to `%c'!\n", skip);
 
 	*numoff = i;
 	*numlen = getnum(data + i, dlen - i, cmd, term);
 	if (!*numlen)
 		return -1;
 
-	DEBUGP("Match succeeded!\n");
+	pr_debug("Match succeeded!\n");
 	return 1;
 }
 
@@ -373,7 +367,7 @@ static int help(struct sk_buff **pskb,
 	/* Until there's been traffic both ways, don't look in packets. */
 	if (ctinfo != IP_CT_ESTABLISHED
 	    && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
-		DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo);
+		pr_debug("ftp: Conntrackinfo = %u\n", ctinfo);
 		return NF_ACCEPT;
 	}
 
@@ -384,8 +378,8 @@ static int help(struct sk_buff **pskb,
 	dataoff = protoff + th->doff * 4;
 	/* No data? */
 	if (dataoff >= (*pskb)->len) {
-		DEBUGP("ftp: dataoff(%u) >= skblen(%u)\n", dataoff,
-			(*pskb)->len);
+		pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff,
+			 (*pskb)->len);
 		return NF_ACCEPT;
 	}
 	datalen = (*pskb)->len - dataoff;
@@ -400,11 +394,11 @@ static int help(struct sk_buff **pskb,
 	/* Look up to see if we're just after a \n. */
 	if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
 		/* Now if this ends in \n, update ftp info. */
-		DEBUGP("nf_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
-		       ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)",
-		       ct_ftp_info->seq_aft_nl[dir][0],
-		       ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)",
-		       ct_ftp_info->seq_aft_nl[dir][1]);
+		pr_debug("nf_conntrack_ftp: wrong seq pos %s(%u) or %s(%u)\n",
+			 ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)",
+			 ct_ftp_info->seq_aft_nl[dir][0],
+			 ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)",
+			 ct_ftp_info->seq_aft_nl[dir][1]);
 		ret = NF_ACCEPT;
 		goto out_update_nl;
 	}
@@ -442,9 +436,9 @@ static int help(struct sk_buff **pskb,
 		goto out_update_nl;
 	}
 
-	DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n",
-	       (int)matchlen, fb_ptr + matchoff,
-	       matchlen, ntohl(th->seq) + matchoff);
+	pr_debug("conntrack_ftp: match `%.*s' (%u bytes at %u)\n",
+		 matchlen, fb_ptr + matchoff,
+		 matchlen, ntohl(th->seq) + matchoff);
 
 	exp = nf_ct_expect_alloc(ct);
 	if (exp == NULL) {
@@ -466,14 +460,16 @@ static int help(struct sk_buff **pskb,
 		   different IP address.  Simply don't record it for
 		   NAT. */
 		if (cmd.l3num == PF_INET) {
-			DEBUGP("conntrack_ftp: NOT RECORDING: " NIPQUAD_FMT " != " NIPQUAD_FMT "\n",
-			       NIPQUAD(cmd.u3.ip),
-			       NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip));
+			pr_debug("conntrack_ftp: NOT RECORDING: " NIPQUAD_FMT
+				 " != " NIPQUAD_FMT "\n",
+				 NIPQUAD(cmd.u3.ip),
+				 NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip));
 		} else {
-			DEBUGP("conntrack_ftp: NOT RECORDING: " NIP6_FMT " != " NIP6_FMT "\n",
-			       NIP6(*((struct in6_addr *)cmd.u3.ip6)),
-			       NIP6(*((struct in6_addr *)ct->tuplehash[dir]
-							.tuple.src.u3.ip6)));
+			pr_debug("conntrack_ftp: NOT RECORDING: " NIP6_FMT
+				 " != " NIP6_FMT "\n",
+				 NIP6(*((struct in6_addr *)cmd.u3.ip6)),
+				 NIP6(*((struct in6_addr *)
+					ct->tuplehash[dir].tuple.src.u3.ip6)));
 		}
 
 		/* Thanks to Cristiano Lincoln Mattos
@@ -530,9 +526,9 @@ static void nf_conntrack_ftp_fini(void)
 			if (ftp[i][j].me == NULL)
 				continue;
 
-			DEBUGP("nf_ct_ftp: unregistering helper for pf: %d "
-			       "port: %d\n",
-				ftp[i][j].tuple.src.l3num, ports[i]);
+			pr_debug("nf_ct_ftp: unregistering helper for pf: %d "
+				 "port: %d\n",
+				 ftp[i][j].tuple.src.l3num, ports[i]);
 			nf_conntrack_helper_unregister(&ftp[i][j]);
 		}
 	}
@@ -571,9 +567,9 @@ static int __init nf_conntrack_ftp_init(void)
 				sprintf(tmpname, "ftp-%d", ports[i]);
 			ftp[i][j].name = tmpname;
 
-			DEBUGP("nf_ct_ftp: registering helper for pf: %d "
-			       "port: %d\n",
-				ftp[i][j].tuple.src.l3num, ports[i]);
+			pr_debug("nf_ct_ftp: registering helper for pf: %d "
+				 "port: %d\n",
+				 ftp[i][j].tuple.src.l3num, ports[i]);
 			ret = nf_conntrack_helper_register(&ftp[i][j]);
 			if (ret) {
 				printk("nf_ct_ftp: failed to register helper "
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index aa5ba99..a8a9dfb 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -31,12 +31,6 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <linux/netfilter/nf_conntrack_h323.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* Parameters */
 static unsigned int default_rrq_ttl __read_mostly = 300;
 module_param(default_rrq_ttl, uint, 0600);
@@ -150,9 +144,9 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff,
 		if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) {
 			/* Netmeeting sends TPKT header and data separately */
 			if (info->tpkt_len[dir] > 0) {
-				DEBUGP("nf_ct_h323: previous packet "
-				       "indicated separate TPKT data of %hu "
-				       "bytes\n", info->tpkt_len[dir]);
+				pr_debug("nf_ct_h323: previous packet "
+					 "indicated separate TPKT data of %hu "
+					 "bytes\n", info->tpkt_len[dir]);
 				if (info->tpkt_len[dir] <= tcpdatalen) {
 					/* Yes, there was a TPKT header
 					 * received */
@@ -163,7 +157,7 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff,
 				}
 
 				/* Fragmented TPKT */
-				DEBUGP("nf_ct_h323: fragmented TPKT\n");
+				pr_debug("nf_ct_h323: fragmented TPKT\n");
 				goto clear_out;
 			}
 
@@ -190,9 +184,9 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff,
 	if (tpktlen > tcpdatalen) {
 		if (tcpdatalen == 4) {	/* Separate TPKT header */
 			/* Netmeeting sends TPKT header and data separately */
-			DEBUGP("nf_ct_h323: separate TPKT header indicates "
-			       "there will be TPKT data of %hu bytes\n",
-			       tpktlen - 4);
+			pr_debug("nf_ct_h323: separate TPKT header indicates "
+				 "there will be TPKT data of %hu bytes\n",
+				 tpktlen - 4);
 			info->tpkt_len[dir] = tpktlen - 4;
 			return 0;
 		}
@@ -308,9 +302,9 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(rtp_exp) == 0) {
 			if (nf_ct_expect_related(rtcp_exp) == 0) {
-				DEBUGP("nf_ct_h323: expect RTP ");
+				pr_debug("nf_ct_h323: expect RTP ");
 				NF_CT_DUMP_TUPLE(&rtp_exp->tuple);
-				DEBUGP("nf_ct_h323: expect RTCP ");
+				pr_debug("nf_ct_h323: expect RTCP ");
 				NF_CT_DUMP_TUPLE(&rtcp_exp->tuple);
 			} else {
 				nf_ct_unexpect_related(rtp_exp);
@@ -365,7 +359,7 @@ static int expect_t120(struct sk_buff **pskb,
 			       port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
-			DEBUGP("nf_ct_h323: expect T.120 ");
+			pr_debug("nf_ct_h323: expect T.120 ");
 			NF_CT_DUMP_TUPLE(&exp->tuple);
 		} else
 			ret = -1;
@@ -413,7 +407,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct,
 {
 	int ret;
 
-	DEBUGP("nf_ct_h323: OpenLogicalChannel\n");
+	pr_debug("nf_ct_h323: OpenLogicalChannel\n");
 
 	if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
 	    eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
@@ -473,7 +467,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
 	H2250LogicalChannelAckParameters *ack;
 	int ret;
 
-	DEBUGP("nf_ct_h323: OpenLogicalChannelAck\n");
+	pr_debug("nf_ct_h323: OpenLogicalChannelAck\n");
 
 	if ((olca->options &
 	     eOpenLogicalChannelAck_reverseLogicalChannelParameters) &&
@@ -544,8 +538,8 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct,
 			return process_olc(pskb, ct, ctinfo, data, dataoff,
 					   &mscm->request.openLogicalChannel);
 		}
-		DEBUGP("nf_ct_h323: H.245 Request %d\n",
-		       mscm->request.choice);
+		pr_debug("nf_ct_h323: H.245 Request %d\n",
+			 mscm->request.choice);
 		break;
 	case eMultimediaSystemControlMessage_response:
 		if (mscm->response.choice ==
@@ -554,11 +548,11 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct,
 					    &mscm->response.
 					    openLogicalChannelAck);
 		}
-		DEBUGP("nf_ct_h323: H.245 Response %d\n",
-		       mscm->response.choice);
+		pr_debug("nf_ct_h323: H.245 Response %d\n",
+			 mscm->response.choice);
 		break;
 	default:
-		DEBUGP("nf_ct_h323: H.245 signal %d\n", mscm->choice);
+		pr_debug("nf_ct_h323: H.245 signal %d\n", mscm->choice);
 		break;
 	}
 
@@ -580,23 +574,23 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff,
 	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
 		return NF_ACCEPT;
 	}
-	DEBUGP("nf_ct_h245: skblen = %u\n", (*pskb)->len);
+	pr_debug("nf_ct_h245: skblen = %u\n", (*pskb)->len);
 
 	spin_lock_bh(&nf_h323_lock);
 
 	/* Process each TPKT */
 	while (get_tpkt_data(pskb, protoff, ct, ctinfo,
 			     &data, &datalen, &dataoff)) {
-		DEBUGP("nf_ct_h245: TPKT len=%d ", datalen);
+		pr_debug("nf_ct_h245: TPKT len=%d ", datalen);
 		NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
 
 		/* Decode H.245 signal */
 		ret = DecodeMultimediaSystemControlMessage(data, datalen,
 							   &mscm);
 		if (ret < 0) {
-			DEBUGP("nf_ct_h245: decoding error: %s\n",
-			       ret == H323_ERROR_BOUND ?
-			       "out of bound" : "out of range");
+			pr_debug("nf_ct_h245: decoding error: %s\n",
+				 ret == H323_ERROR_BOUND ?
+				 "out of bound" : "out of range");
 			/* We don't drop when decoding error */
 			break;
 		}
@@ -697,7 +691,7 @@ static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct,
 			       port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
-			DEBUGP("nf_ct_q931: expect H.245 ");
+			pr_debug("nf_ct_q931: expect H.245 ");
 			NF_CT_DUMP_TUPLE(&exp->tuple);
 		} else
 			ret = -1;
@@ -786,7 +780,7 @@ static int expect_callforwarding(struct sk_buff **pskb,
 	if (callforward_filter &&
 	    callforward_do_filter(&addr, &ct->tuplehash[!dir].tuple.src.u3,
 				  ct->tuplehash[!dir].tuple.src.l3num)) {
-		DEBUGP("nf_ct_q931: Call Forwarding not tracked\n");
+		pr_debug("nf_ct_q931: Call Forwarding not tracked\n");
 		return 0;
 	}
 
@@ -808,7 +802,7 @@ static int expect_callforwarding(struct sk_buff **pskb,
 					 taddr, port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
-			DEBUGP("nf_ct_q931: expect Call Forwarding ");
+			pr_debug("nf_ct_q931: expect Call Forwarding ");
 			NF_CT_DUMP_TUPLE(&exp->tuple);
 		} else
 			ret = -1;
@@ -832,7 +826,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
 	union nf_conntrack_address addr;
 	typeof(set_h225_addr_hook) set_h225_addr;
 
-	DEBUGP("nf_ct_q931: Setup\n");
+	pr_debug("nf_ct_q931: Setup\n");
 
 	if (setup->options & eSetup_UUIE_h245Address) {
 		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
@@ -847,11 +841,11 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
 	    get_h225_addr(ct, *data, &setup->destCallSignalAddress,
 			  &addr, &port) &&
 	    memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) {
-		DEBUGP("nf_ct_q931: set destCallSignalAddress "
-		       NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n",
-		       NIP6(*(struct in6_addr *)&addr), ntohs(port),
-		       NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3),
-		       ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
+		pr_debug("nf_ct_q931: set destCallSignalAddress "
+			 NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n",
+			 NIP6(*(struct in6_addr *)&addr), ntohs(port),
+			 NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3),
+			 ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
 		ret = set_h225_addr(pskb, data, dataoff,
 				    &setup->destCallSignalAddress,
 				    &ct->tuplehash[!dir].tuple.src.u3,
@@ -865,11 +859,11 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
 	    get_h225_addr(ct, *data, &setup->sourceCallSignalAddress,
 			  &addr, &port) &&
 	    memcmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(addr))) {
-		DEBUGP("nf_ct_q931: set sourceCallSignalAddress "
-		       NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n",
-		       NIP6(*(struct in6_addr *)&addr), ntohs(port),
-		       NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3),
-		       ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
+		pr_debug("nf_ct_q931: set sourceCallSignalAddress "
+			 NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n",
+			 NIP6(*(struct in6_addr *)&addr), ntohs(port),
+			 NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3),
+			 ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
 		ret = set_h225_addr(pskb, data, dataoff,
 				    &setup->sourceCallSignalAddress,
 				    &ct->tuplehash[!dir].tuple.dst.u3,
@@ -900,7 +894,7 @@ static int process_callproceeding(struct sk_buff **pskb,
 	int ret;
 	int i;
 
-	DEBUGP("nf_ct_q931: CallProceeding\n");
+	pr_debug("nf_ct_q931: CallProceeding\n");
 
 	if (callproc->options & eCallProceeding_UUIE_h245Address) {
 		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
@@ -930,7 +924,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct,
 	int ret;
 	int i;
 
-	DEBUGP("nf_ct_q931: Connect\n");
+	pr_debug("nf_ct_q931: Connect\n");
 
 	if (connect->options & eConnect_UUIE_h245Address) {
 		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
@@ -960,7 +954,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct,
 	int ret;
 	int i;
 
-	DEBUGP("nf_ct_q931: Alerting\n");
+	pr_debug("nf_ct_q931: Alerting\n");
 
 	if (alert->options & eAlerting_UUIE_h245Address) {
 		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
@@ -990,7 +984,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct,
 	int ret;
 	int i;
 
-	DEBUGP("nf_ct_q931: Facility\n");
+	pr_debug("nf_ct_q931: Facility\n");
 
 	if (facility->reason.choice == eFacilityReason_callForwarded) {
 		if (facility->options & eFacility_UUIE_alternativeAddress)
@@ -1029,7 +1023,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct,
 	int ret;
 	int i;
 
-	DEBUGP("nf_ct_q931: Progress\n");
+	pr_debug("nf_ct_q931: Progress\n");
 
 	if (progress->options & eProgress_UUIE_h245Address) {
 		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
@@ -1086,8 +1080,8 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct,
 				       &pdu->h323_message_body.progress);
 		break;
 	default:
-		DEBUGP("nf_ct_q931: Q.931 signal %d\n",
-		       pdu->h323_message_body.choice);
+		pr_debug("nf_ct_q931: Q.931 signal %d\n",
+			 pdu->h323_message_body.choice);
 		break;
 	}
 
@@ -1121,22 +1115,22 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff,
 	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
 		return NF_ACCEPT;
 	}
-	DEBUGP("nf_ct_q931: skblen = %u\n", (*pskb)->len);
+	pr_debug("nf_ct_q931: skblen = %u\n", (*pskb)->len);
 
 	spin_lock_bh(&nf_h323_lock);
 
 	/* Process each TPKT */
 	while (get_tpkt_data(pskb, protoff, ct, ctinfo,
 			     &data, &datalen, &dataoff)) {
-		DEBUGP("nf_ct_q931: TPKT len=%d ", datalen);
+		pr_debug("nf_ct_q931: TPKT len=%d ", datalen);
 		NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
 
 		/* Decode Q.931 signal */
 		ret = DecodeQ931(data, datalen, &q931);
 		if (ret < 0) {
-			DEBUGP("nf_ct_q931: decoding error: %s\n",
-			       ret == H323_ERROR_BOUND ?
-			       "out of bound" : "out of range");
+			pr_debug("nf_ct_q931: decoding error: %s\n",
+				 ret == H323_ERROR_BOUND ?
+				 "out of bound" : "out of range");
 			/* We don't drop when decoding error */
 			break;
 		}
@@ -1274,7 +1268,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct,
 		ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
-			DEBUGP("nf_ct_ras: expect Q.931 ");
+			pr_debug("nf_ct_ras: expect Q.931 ");
 			NF_CT_DUMP_TUPLE(&exp->tuple);
 
 			/* Save port for looking up expect in processing RCF */
@@ -1295,7 +1289,7 @@ static int process_grq(struct sk_buff **pskb, struct nf_conn *ct,
 {
 	typeof(set_ras_addr_hook) set_ras_addr;
 
-	DEBUGP("nf_ct_ras: GRQ\n");
+	pr_debug("nf_ct_ras: GRQ\n");
 
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && ct->status & IPS_NAT_MASK)	/* NATed */
@@ -1315,7 +1309,7 @@ static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct,
 	union nf_conntrack_address addr;
 	struct nf_conntrack_expect *exp;
 
-	DEBUGP("nf_ct_ras: GCF\n");
+	pr_debug("nf_ct_ras: GCF\n");
 
 	if (!get_h225_addr(ct, *data, &gcf->rasAddress, &addr, &port))
 		return 0;
@@ -1338,7 +1332,7 @@ static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct,
 	exp->helper = nf_conntrack_helper_ras;
 
 	if (nf_ct_expect_related(exp) == 0) {
-		DEBUGP("nf_ct_ras: expect RAS ");
+		pr_debug("nf_ct_ras: expect RAS ");
 		NF_CT_DUMP_TUPLE(&exp->tuple);
 	} else
 		ret = -1;
@@ -1357,7 +1351,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct,
 	int ret;
 	typeof(set_ras_addr_hook) set_ras_addr;
 
-	DEBUGP("nf_ct_ras: RRQ\n");
+	pr_debug("nf_ct_ras: RRQ\n");
 
 	ret = expect_q931(pskb, ct, ctinfo, data,
 			  rrq->callSignalAddress.item,
@@ -1375,7 +1369,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct,
 	}
 
 	if (rrq->options & eRegistrationRequest_timeToLive) {
-		DEBUGP("nf_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive);
+		pr_debug("nf_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive);
 		info->timeout = rrq->timeToLive;
 	} else
 		info->timeout = default_rrq_ttl;
@@ -1394,7 +1388,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct,
 	struct nf_conntrack_expect *exp;
 	typeof(set_sig_addr_hook) set_sig_addr;
 
-	DEBUGP("nf_ct_ras: RCF\n");
+	pr_debug("nf_ct_ras: RCF\n");
 
 	set_sig_addr = rcu_dereference(set_sig_addr_hook);
 	if (set_sig_addr && ct->status & IPS_NAT_MASK) {
@@ -1406,14 +1400,13 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct,
 	}
 
 	if (rcf->options & eRegistrationConfirm_timeToLive) {
-		DEBUGP("nf_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive);
+		pr_debug("nf_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive);
 		info->timeout = rcf->timeToLive;
 	}
 
 	if (info->timeout > 0) {
-		DEBUGP
-		    ("nf_ct_ras: set RAS connection timeout to %u seconds\n",
-		     info->timeout);
+		pr_debug("nf_ct_ras: set RAS connection timeout to "
+			 "%u seconds\n", info->timeout);
 		nf_ct_refresh(ct, *pskb, info->timeout * HZ);
 
 		/* Set expect timeout */
@@ -1421,9 +1414,9 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct,
 		exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3,
 				  info->sig_port[!dir]);
 		if (exp) {
-			DEBUGP("nf_ct_ras: set Q.931 expect "
-			       "timeout to %u seconds for",
-			       info->timeout);
+			pr_debug("nf_ct_ras: set Q.931 expect "
+				 "timeout to %u seconds for",
+				 info->timeout);
 			NF_CT_DUMP_TUPLE(&exp->tuple);
 			set_expect_timeout(exp, info->timeout);
 		}
@@ -1443,7 +1436,7 @@ static int process_urq(struct sk_buff **pskb, struct nf_conn *ct,
 	int ret;
 	typeof(set_sig_addr_hook) set_sig_addr;
 
-	DEBUGP("nf_ct_ras: URQ\n");
+	pr_debug("nf_ct_ras: URQ\n");
 
 	set_sig_addr = rcu_dereference(set_sig_addr_hook);
 	if (set_sig_addr && ct->status & IPS_NAT_MASK) {
@@ -1476,7 +1469,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct,
 	union nf_conntrack_address addr;
 	typeof(set_h225_addr_hook) set_h225_addr;
 
-	DEBUGP("nf_ct_ras: ARQ\n");
+	pr_debug("nf_ct_ras: ARQ\n");
 
 	set_h225_addr = rcu_dereference(set_h225_addr_hook);
 	if ((arq->options & eAdmissionRequest_destCallSignalAddress) &&
@@ -1519,7 +1512,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct,
 	struct nf_conntrack_expect *exp;
 	typeof(set_sig_addr_hook) set_sig_addr;
 
-	DEBUGP("nf_ct_ras: ACF\n");
+	pr_debug("nf_ct_ras: ACF\n");
 
 	if (!get_h225_addr(ct, *data, &acf->destCallSignalAddress,
 			   &addr, &port))
@@ -1544,7 +1537,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct,
 	exp->helper = nf_conntrack_helper_q931;
 
 	if (nf_ct_expect_related(exp) == 0) {
-		DEBUGP("nf_ct_ras: expect Q.931 ");
+		pr_debug("nf_ct_ras: expect Q.931 ");
 		NF_CT_DUMP_TUPLE(&exp->tuple);
 	} else
 		ret = -1;
@@ -1561,7 +1554,7 @@ static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct,
 {
 	typeof(set_ras_addr_hook) set_ras_addr;
 
-	DEBUGP("nf_ct_ras: LRQ\n");
+	pr_debug("nf_ct_ras: LRQ\n");
 
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && ct->status & IPS_NAT_MASK)
@@ -1581,7 +1574,7 @@ static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct,
 	union nf_conntrack_address addr;
 	struct nf_conntrack_expect *exp;
 
-	DEBUGP("nf_ct_ras: LCF\n");
+	pr_debug("nf_ct_ras: LCF\n");
 
 	if (!get_h225_addr(ct, *data, &lcf->callSignalAddress,
 			   &addr, &port))
@@ -1597,7 +1590,7 @@ static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct,
 	exp->helper = nf_conntrack_helper_q931;
 
 	if (nf_ct_expect_related(exp) == 0) {
-		DEBUGP("nf_ct_ras: expect Q.931 ");
+		pr_debug("nf_ct_ras: expect Q.931 ");
 		NF_CT_DUMP_TUPLE(&exp->tuple);
 	} else
 		ret = -1;
@@ -1618,7 +1611,7 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct,
 	typeof(set_ras_addr_hook) set_ras_addr;
 	typeof(set_sig_addr_hook) set_sig_addr;
 
-	DEBUGP("nf_ct_ras: IRR\n");
+	pr_debug("nf_ct_ras: IRR\n");
 
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && ct->status & IPS_NAT_MASK) {
@@ -1677,7 +1670,7 @@ static int process_ras(struct sk_buff **pskb, struct nf_conn *ct,
 		return process_irr(pskb, ct, ctinfo, data,
 				   &ras->infoRequestResponse);
 	default:
-		DEBUGP("nf_ct_ras: RAS message %d\n", ras->choice);
+		pr_debug("nf_ct_ras: RAS message %d\n", ras->choice);
 		break;
 	}
 
@@ -1693,7 +1686,7 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff,
 	int datalen = 0;
 	int ret;
 
-	DEBUGP("nf_ct_ras: skblen = %u\n", (*pskb)->len);
+	pr_debug("nf_ct_ras: skblen = %u\n", (*pskb)->len);
 
 	spin_lock_bh(&nf_h323_lock);
 
@@ -1701,15 +1694,15 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff,
 	data = get_udp_data(pskb, protoff, &datalen);
 	if (data == NULL)
 		goto accept;
-	DEBUGP("nf_ct_ras: RAS message len=%d ", datalen);
+	pr_debug("nf_ct_ras: RAS message len=%d ", datalen);
 	NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
 
 	/* Decode RAS message */
 	ret = DecodeRasMessage(data, datalen, &ras);
 	if (ret < 0) {
-		DEBUGP("nf_ct_ras: decoding error: %s\n",
-		       ret == H323_ERROR_BOUND ?
-		       "out of bound" : "out of range");
+		pr_debug("nf_ct_ras: decoding error: %s\n",
+			 ret == H323_ERROR_BOUND ?
+			 "out of bound" : "out of range");
 		goto accept;
 	}
 
@@ -1760,7 +1753,7 @@ static void __exit nf_conntrack_h323_fini(void)
 	nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]);
 	nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]);
 	kfree(h323_buffer);
-	DEBUGP("nf_ct_h323: fini\n");
+	pr_debug("nf_ct_h323: fini\n");
 }
 
 /****************************************************************************/
@@ -1783,7 +1776,7 @@ static int __init nf_conntrack_h323_init(void)
 	ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]);
 	if (ret < 0)
 		goto err4;
-	DEBUGP("nf_ct_h323: init success\n");
+	pr_debug("nf_ct_h323: init success\n");
 	return 0;
 
 err4:
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 8c73407..1562ca9 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -12,6 +12,7 @@
 #include <linux/moduleparam.h>
 #include <linux/skbuff.h>
 #include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/netfilter.h>
 
@@ -55,13 +56,6 @@ static const char *dccprotos[] = {
 
 #define MINMATCHLEN	5
 
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \
-				       __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* tries to get the ip_addr and port out of a dcc command
  * return value: -1 on failure, 0 on success
  *	data		pointer to first byte of DCC command data
@@ -99,6 +93,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	unsigned int dataoff;
+	struct iphdr *iph;
 	struct tcphdr _tcph, *th;
 	char *data, *data_limit, *ib_ptr;
 	int dir = CTINFO2DIR(ctinfo);
@@ -148,9 +143,10 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 		data += 5;
 		/* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
 
-		DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n",
-			NIPQUAD(iph->saddr), ntohs(th->source),
-			NIPQUAD(iph->daddr), ntohs(th->dest));
+		iph = ip_hdr(*pskb);
+		pr_debug("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u\n",
+			 NIPQUAD(iph->saddr), ntohs(th->source),
+			 NIPQUAD(iph->daddr), ntohs(th->dest));
 
 		for (i = 0; i < ARRAY_SIZE(dccprotos); i++) {
 			if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) {
@@ -158,18 +154,18 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 				continue;
 			}
 			data += strlen(dccprotos[i]);
-			DEBUGP("DCC %s detected\n", dccprotos[i]);
+			pr_debug("DCC %s detected\n", dccprotos[i]);
 
 			/* we have at least
 			 * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
 			 * data left (== 14/13 bytes) */
 			if (parse_dcc((char *)data, data_limit, &dcc_ip,
 				       &dcc_port, &addr_beg_p, &addr_end_p)) {
-				DEBUGP("unable to parse dcc command\n");
+				pr_debug("unable to parse dcc command\n");
 				continue;
 			}
-			DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n",
-				HIPQUAD(dcc_ip), dcc_port);
+			pr_debug("DCC bound ip/port: %u.%u.%u.%u:%u\n",
+				 HIPQUAD(dcc_ip), dcc_port);
 
 			/* dcc_ip can be the internal OR external (NAT'ed) IP */
 			tuple = &ct->tuplehash[dir].tuple;
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index 2fd0f11..b1bfa20 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -31,12 +31,6 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
 				struct nf_conntrack_tuple *tuple)
 {
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 63dac5e..b080419 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -65,7 +65,7 @@ void
 			     struct nf_conntrack_expect *exp) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn);
 
-#if 0
+#ifdef DEBUG
 /* PptpControlMessageType names */
 const char *pptp_msg_name[] = {
 	"UNKNOWN_MESSAGE",
@@ -86,9 +86,6 @@ const char *pptp_msg_name[] = {
 	"SET_LINK_INFO"
 };
 EXPORT_SYMBOL(pptp_msg_name);
-#define DEBUGP(format, args...)	printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
 #endif
 
 #define SECS *HZ
@@ -102,7 +99,7 @@ static void pptp_expectfn(struct nf_conn *ct,
 			 struct nf_conntrack_expect *exp)
 {
 	typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn;
-	DEBUGP("increasing timeouts\n");
+	pr_debug("increasing timeouts\n");
 
 	/* increase timeout of GRE data channel conntrack entry */
 	ct->proto.gre.timeout	     = PPTP_GRE_TIMEOUT;
@@ -121,17 +118,17 @@ static void pptp_expectfn(struct nf_conn *ct,
 
 		/* obviously this tuple inversion only works until you do NAT */
 		nf_ct_invert_tuplepr(&inv_t, &exp->tuple);
-		DEBUGP("trying to unexpect other dir: ");
+		pr_debug("trying to unexpect other dir: ");
 		NF_CT_DUMP_TUPLE(&inv_t);
 
 		exp_other = nf_ct_expect_find_get(&inv_t);
 		if (exp_other) {
 			/* delete other expectation.  */
-			DEBUGP("found\n");
+			pr_debug("found\n");
 			nf_ct_unexpect_related(exp_other);
 			nf_ct_expect_put(exp_other);
 		} else {
-			DEBUGP("not found\n");
+			pr_debug("not found\n");
 		}
 	}
 	rcu_read_unlock();
@@ -143,13 +140,13 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
 	struct nf_conntrack_expect *exp;
 	struct nf_conn *sibling;
 
-	DEBUGP("trying to timeout ct or exp for tuple ");
+	pr_debug("trying to timeout ct or exp for tuple ");
 	NF_CT_DUMP_TUPLE(t);
 
 	h = nf_conntrack_find_get(t);
 	if (h)  {
 		sibling = nf_ct_tuplehash_to_ctrack(h);
-		DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
+		pr_debug("setting timeout of conntrack %p to 0\n", sibling);
 		sibling->proto.gre.timeout	  = 0;
 		sibling->proto.gre.stream_timeout = 0;
 		if (del_timer(&sibling->timeout))
@@ -159,7 +156,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
 	} else {
 		exp = nf_ct_expect_find_get(t);
 		if (exp) {
-			DEBUGP("unexpect_related of expect %p\n", exp);
+			pr_debug("unexpect_related of expect %p\n", exp);
 			nf_ct_unexpect_related(exp);
 			nf_ct_expect_put(exp);
 			return 1;
@@ -182,7 +179,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
 	t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id;
 	t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id;
 	if (!destroy_sibling_or_exp(&t))
-		DEBUGP("failed to timeout original pns->pac ct/exp\n");
+		pr_debug("failed to timeout original pns->pac ct/exp\n");
 
 	/* try reply (pac->pns) tuple */
 	memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
@@ -190,7 +187,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
 	t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id;
 	t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id;
 	if (!destroy_sibling_or_exp(&t))
-		DEBUGP("failed to timeout reply pac->pns ct/exp\n");
+		pr_debug("failed to timeout reply pac->pns ct/exp\n");
 }
 
 /* expect GRE connections (PNS->PAC and PAC->PNS direction) */
@@ -270,7 +267,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 	typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound;
 
 	msg = ntohs(ctlh->messageType);
-	DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
+	pr_debug("inbound control message %s\n", pptp_msg_name[msg]);
 
 	switch (msg) {
 	case PPTP_START_SESSION_REPLY:
@@ -305,8 +302,8 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		pcid = pptpReq->ocack.peersCallID;
 		if (info->pns_call_id != pcid)
 			goto invalid;
-		DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
-			ntohs(cid), ntohs(pcid));
+		pr_debug("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
+			 ntohs(cid), ntohs(pcid));
 
 		if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
 			info->cstate = PPTP_CALL_OUT_CONF;
@@ -322,7 +319,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 			goto invalid;
 
 		cid = pptpReq->icreq.callID;
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+		pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
 		info->cstate = PPTP_CALL_IN_REQ;
 		info->pac_call_id = cid;
 		break;
@@ -341,7 +338,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		if (info->pns_call_id != pcid)
 			goto invalid;
 
-		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
+		pr_debug("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
 		info->cstate = PPTP_CALL_IN_CONF;
 
 		/* we expect a GRE connection from PAC to PNS */
@@ -351,7 +348,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 	case PPTP_CALL_DISCONNECT_NOTIFY:
 		/* server confirms disconnect */
 		cid = pptpReq->disc.callID;
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+		pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
 		info->cstate = PPTP_CALL_NONE;
 
 		/* untrack this call id, unexpect GRE packets */
@@ -374,11 +371,11 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 	return NF_ACCEPT;
 
 invalid:
-	DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
-	       "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
-	       msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
-	       msg, ntohs(cid), ntohs(pcid),  info->cstate, info->sstate,
-	       ntohs(info->pns_call_id), ntohs(info->pac_call_id));
+	pr_debug("invalid %s: type=%d cid=%u pcid=%u "
+		 "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+		 msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+		 msg, ntohs(cid), ntohs(pcid),  info->cstate, info->sstate,
+		 ntohs(info->pns_call_id), ntohs(info->pac_call_id));
 	return NF_ACCEPT;
 }
 
@@ -396,7 +393,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 	typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound;
 
 	msg = ntohs(ctlh->messageType);
-	DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
+	pr_debug("outbound control message %s\n", pptp_msg_name[msg]);
 
 	switch (msg) {
 	case PPTP_START_SESSION_REQUEST:
@@ -418,7 +415,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		info->cstate = PPTP_CALL_OUT_REQ;
 		/* track PNS call id */
 		cid = pptpReq->ocreq.callID;
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+		pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
 		info->pns_call_id = cid;
 		break;
 
@@ -432,8 +429,8 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		pcid = pptpReq->icack.peersCallID;
 		if (info->pac_call_id != pcid)
 			goto invalid;
-		DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
-		       ntohs(cid), ntohs(pcid));
+		pr_debug("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
+			 ntohs(cid), ntohs(pcid));
 
 		if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
 			/* part two of the three-way handshake */
@@ -469,11 +466,11 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 	return NF_ACCEPT;
 
 invalid:
-	DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
-	       "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
-	       msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
-	       msg, ntohs(cid), ntohs(pcid),  info->cstate, info->sstate,
-	       ntohs(info->pns_call_id), ntohs(info->pac_call_id));
+	pr_debug("invalid %s: type=%d cid=%u pcid=%u "
+		 "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+		 msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+		 msg, ntohs(cid), ntohs(pcid),  info->cstate, info->sstate,
+		 ntohs(info->pns_call_id), ntohs(info->pac_call_id));
 	return NF_ACCEPT;
 }
 
@@ -524,7 +521,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
 
 	pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
 	if (!pptph) {
-		DEBUGP("no full PPTP header, can't track\n");
+		pr_debug("no full PPTP header, can't track\n");
 		return NF_ACCEPT;
 	}
 	nexthdr_off += sizeof(_pptph);
@@ -533,7 +530,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
 	/* if it's not a control message we can't do anything with it */
 	if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
 	    ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
-		DEBUGP("not a control packet\n");
+		pr_debug("not a control packet\n");
 		return NF_ACCEPT;
 	}
 
@@ -569,8 +566,8 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
 		/* server -> client (PAC -> PNS) */
 		ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
 				       ctinfo);
-	DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
-		oldsstate, info->sstate, oldcstate, info->cstate);
+	pr_debug("sstate: %d->%d, cstate: %d->%d\n",
+		 oldsstate, info->sstate, oldcstate, info->cstate);
 	spin_unlock_bh(&nf_pptp_lock);
 
 	return ret;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 339c397..771c4c2 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -40,12 +40,6 @@
 #define GRE_TIMEOUT		(30 * HZ)
 #define GRE_STREAM_TIMEOUT	(180 * HZ)
 
-#if 0
-#define DEBUGP(format, args...)	printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#else
-#define DEBUGP(x, args...)
-#endif
-
 static DEFINE_RWLOCK(nf_ct_gre_lock);
 static LIST_HEAD(gre_keymap_list);
 
@@ -87,7 +81,7 @@ static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t)
 	}
 	read_unlock_bh(&nf_ct_gre_lock);
 
-	DEBUGP("lookup src key 0x%x for ", key);
+	pr_debug("lookup src key 0x%x for ", key);
 	NF_CT_DUMP_TUPLE(t);
 
 	return key;
@@ -107,8 +101,8 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 			if (gre_key_cmpfn(km, t) && km == *kmp)
 				return 0;
 		}
-		DEBUGP("trying to override keymap_%s for ct %p\n",
-			dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
+		pr_debug("trying to override keymap_%s for ct %p\n",
+			 dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
 		return -EEXIST;
 	}
 
@@ -118,7 +112,7 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 	memcpy(&km->tuple, t, sizeof(*t));
 	*kmp = km;
 
-	DEBUGP("adding new entry %p: ", km);
+	pr_debug("adding new entry %p: ", km);
 	NF_CT_DUMP_TUPLE(&km->tuple);
 
 	write_lock_bh(&nf_ct_gre_lock);
@@ -135,13 +129,13 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
 	struct nf_conn_help *help = nfct_help(ct);
 	enum ip_conntrack_dir dir;
 
-	DEBUGP("entering for ct %p\n", ct);
+	pr_debug("entering for ct %p\n", ct);
 
 	write_lock_bh(&nf_ct_gre_lock);
 	for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) {
 		if (help->help.ct_pptp_info.keymap[dir]) {
-			DEBUGP("removing %p from list\n",
-				help->help.ct_pptp_info.keymap[dir]);
+			pr_debug("removing %p from list\n",
+				 help->help.ct_pptp_info.keymap[dir]);
 			list_del(&help->help.ct_pptp_info.keymap[dir]->list);
 			kfree(help->help.ct_pptp_info.keymap[dir]);
 			help->help.ct_pptp_info.keymap[dir] = NULL;
@@ -186,7 +180,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb,
 		return 1;
 
 	if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
-		DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
+		pr_debug("GRE_VERSION_PPTP but unknown proto\n");
 		return 0;
 	}
 
@@ -242,7 +236,7 @@ static int gre_packet(struct nf_conn *ct,
 static int gre_new(struct nf_conn *ct, const struct sk_buff *skb,
 		   unsigned int dataoff)
 {
-	DEBUGP(": ");
+	pr_debug(": ");
 	NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 
 	/* initialize to sane value.  Ideally a conntrack helper
@@ -258,10 +252,10 @@ static int gre_new(struct nf_conn *ct, const struct sk_buff *skb,
 static void gre_destroy(struct nf_conn *ct)
 {
 	struct nf_conn *master = ct->master;
-	DEBUGP(" entering\n");
+	pr_debug(" entering\n");
 
 	if (!master)
-		DEBUGP("no master !?!\n");
+		pr_debug("no master !?!\n");
 	else
 		nf_ct_gre_keymap_destroy(master);
 }
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 0d3254b..265769e 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -25,12 +25,6 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 
-#if 0
-#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* Protects conntrack->proto.sctp */
 static DEFINE_RWLOCK(sctp_lock);
 
@@ -151,9 +145,6 @@ static int sctp_pkt_to_tuple(const struct sk_buff *skb,
 {
 	sctp_sctphdr_t _hdr, *hp;
 
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
 	/* Actually only need first 8 bytes. */
 	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
 	if (hp == NULL)
@@ -167,9 +158,6 @@ static int sctp_pkt_to_tuple(const struct sk_buff *skb,
 static int sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
 			     const struct nf_conntrack_tuple *orig)
 {
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
 	tuple->src.u.sctp.port = orig->dst.u.sctp.port;
 	tuple->dst.u.sctp.port = orig->src.u.sctp.port;
 	return 1;
@@ -179,9 +167,6 @@ static int sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
 static int sctp_print_tuple(struct seq_file *s,
 			    const struct nf_conntrack_tuple *tuple)
 {
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
 	return seq_printf(s, "sport=%hu dport=%hu ",
 			  ntohs(tuple->src.u.sctp.port),
 			  ntohs(tuple->dst.u.sctp.port));
@@ -193,9 +178,6 @@ static int sctp_print_conntrack(struct seq_file *s,
 {
 	enum sctp_conntrack state;
 
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
 	read_lock_bh(&sctp_lock);
 	state = conntrack->proto.sctp.state;
 	read_unlock_bh(&sctp_lock);
@@ -219,13 +201,10 @@ static int do_basic_checks(struct nf_conn *conntrack,
 	sctp_chunkhdr_t _sch, *sch;
 	int flag;
 
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
 	flag = 0;
 
 	for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
-		DEBUGP("Chunk Num: %d  Type: %d\n", count, sch->type);
+		pr_debug("Chunk Num: %d  Type: %d\n", count, sch->type);
 
 		if (sch->type == SCTP_CID_INIT
 			|| sch->type == SCTP_CID_INIT_ACK
@@ -242,7 +221,7 @@ static int do_basic_checks(struct nf_conn *conntrack,
 			|| sch->type == SCTP_CID_COOKIE_ECHO
 			|| flag)
 		      && count !=0) || !sch->length) {
-			DEBUGP("Basic checks failed\n");
+			pr_debug("Basic checks failed\n");
 			return 1;
 		}
 
@@ -251,7 +230,7 @@ static int do_basic_checks(struct nf_conn *conntrack,
 		}
 	}
 
-	DEBUGP("Basic checks passed\n");
+	pr_debug("Basic checks passed\n");
 	return count == 0;
 }
 
@@ -261,50 +240,47 @@ static int new_state(enum ip_conntrack_dir dir,
 {
 	int i;
 
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	DEBUGP("Chunk type: %d\n", chunk_type);
+	pr_debug("Chunk type: %d\n", chunk_type);
 
 	switch (chunk_type) {
 		case SCTP_CID_INIT:
-			DEBUGP("SCTP_CID_INIT\n");
+			pr_debug("SCTP_CID_INIT\n");
 			i = 0; break;
 		case SCTP_CID_INIT_ACK:
-			DEBUGP("SCTP_CID_INIT_ACK\n");
+			pr_debug("SCTP_CID_INIT_ACK\n");
 			i = 1; break;
 		case SCTP_CID_ABORT:
-			DEBUGP("SCTP_CID_ABORT\n");
+			pr_debug("SCTP_CID_ABORT\n");
 			i = 2; break;
 		case SCTP_CID_SHUTDOWN:
-			DEBUGP("SCTP_CID_SHUTDOWN\n");
+			pr_debug("SCTP_CID_SHUTDOWN\n");
 			i = 3; break;
 		case SCTP_CID_SHUTDOWN_ACK:
-			DEBUGP("SCTP_CID_SHUTDOWN_ACK\n");
+			pr_debug("SCTP_CID_SHUTDOWN_ACK\n");
 			i = 4; break;
 		case SCTP_CID_ERROR:
-			DEBUGP("SCTP_CID_ERROR\n");
+			pr_debug("SCTP_CID_ERROR\n");
 			i = 5; break;
 		case SCTP_CID_COOKIE_ECHO:
-			DEBUGP("SCTP_CID_COOKIE_ECHO\n");
+			pr_debug("SCTP_CID_COOKIE_ECHO\n");
 			i = 6; break;
 		case SCTP_CID_COOKIE_ACK:
-			DEBUGP("SCTP_CID_COOKIE_ACK\n");
+			pr_debug("SCTP_CID_COOKIE_ACK\n");
 			i = 7; break;
 		case SCTP_CID_SHUTDOWN_COMPLETE:
-			DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n");
+			pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n");
 			i = 8; break;
 		default:
 			/* Other chunks like DATA, SACK, HEARTBEAT and
 			its ACK do not cause a change in state */
-			DEBUGP("Unknown chunk type, Will stay in %s\n",
-						sctp_conntrack_names[cur_state]);
+			pr_debug("Unknown chunk type, Will stay in %s\n",
+				 sctp_conntrack_names[cur_state]);
 			return cur_state;
 	}
 
-	DEBUGP("dir: %d   cur_state: %s  chunk_type: %d  new_state: %s\n",
-			dir, sctp_conntrack_names[cur_state], chunk_type,
-			sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
+	pr_debug("dir: %d   cur_state: %s  chunk_type: %d  new_state: %s\n",
+		 dir, sctp_conntrack_names[cur_state], chunk_type,
+		 sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
 
 	return sctp_conntracks[dir][i][cur_state];
 }
@@ -323,9 +299,6 @@ static int sctp_packet(struct nf_conn *conntrack,
 	u_int32_t offset, count;
 	char map[256 / sizeof (char)] = {0};
 
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
 	sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
 	if (sh == NULL)
 		return -1;
@@ -340,7 +313,7 @@ static int sctp_packet(struct nf_conn *conntrack,
 		&& !test_bit(SCTP_CID_ABORT, (void *)map)
 		&& !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
 		&& (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
-		DEBUGP("Verification tag check failed\n");
+		pr_debug("Verification tag check failed\n");
 		return -1;
 	}
 
@@ -385,8 +358,9 @@ static int sctp_packet(struct nf_conn *conntrack,
 
 		/* Invalid */
 		if (newconntrack == SCTP_CONNTRACK_MAX) {
-			DEBUGP("nf_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
-			       CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
+			pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u "
+				 "conntrack=%u\n",
+				 CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
 			write_unlock_bh(&sctp_lock);
 			return -1;
 		}
@@ -402,8 +376,8 @@ static int sctp_packet(struct nf_conn *conntrack,
 					write_unlock_bh(&sctp_lock);
 					return -1;
 			}
-			DEBUGP("Setting vtag %x for dir %d\n",
-					ih->init_tag, !CTINFO2DIR(ctinfo));
+			pr_debug("Setting vtag %x for dir %d\n",
+				 ih->init_tag, !CTINFO2DIR(ctinfo));
 			conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
 		}
 
@@ -418,7 +392,7 @@ static int sctp_packet(struct nf_conn *conntrack,
 	if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
 		&& CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
 		&& newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
-		DEBUGP("Setting assured bit\n");
+		pr_debug("Setting assured bit\n");
 		set_bit(IPS_ASSURED_BIT, &conntrack->status);
 		nf_conntrack_event_cache(IPCT_STATUS, skb);
 	}
@@ -436,9 +410,6 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
 	u_int32_t offset, count;
 	char map[256 / sizeof (char)] = {0};
 
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
 	sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
 	if (sh == NULL)
 		return 0;
@@ -461,7 +432,7 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
 
 		/* Invalid: delete conntrack */
 		if (newconntrack == SCTP_CONNTRACK_MAX) {
-			DEBUGP("nf_conntrack_sctp: invalid new deleting.\n");
+			pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
 			return 0;
 		}
 
@@ -475,8 +446,8 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
 				if (ih == NULL)
 					return 0;
 
-				DEBUGP("Setting vtag %x for new conn\n",
-					ih->init_tag);
+				pr_debug("Setting vtag %x for new conn\n",
+					 ih->init_tag);
 
 				conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] =
 								ih->init_tag;
@@ -488,8 +459,8 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
 		/* If it is a shutdown ack OOTB packet, we expect a return
 		   shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
 		else {
-			DEBUGP("Setting vtag %x for new conn OOTB\n",
-				sh->vtag);
+			pr_debug("Setting vtag %x for new conn OOTB\n",
+				 sh->vtag);
 			conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
 		}
 
@@ -688,8 +659,6 @@ int __init nf_conntrack_proto_sctp_init(void)
  cleanup_sctp4:
 	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
  out:
-	DEBUGP("SCTP conntrack module loading %s\n",
-					ret ? "failed": "succeeded");
 	return ret;
 }
 
@@ -697,7 +666,6 @@ void __exit nf_conntrack_proto_sctp_fini(void)
 {
 	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
 	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
-	DEBUGP("SCTP conntrack module unloaded\n");
 }
 
 module_init(nf_conntrack_proto_sctp_init);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index ccdd5d2..1c8206e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -26,13 +26,6 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 
-#if 0
-#define DEBUGP printk
-#define DEBUGP_VARS
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* Protects conntrack->proto.tcp */
 static DEFINE_RWLOCK(tcp_lock);
 
@@ -496,7 +489,8 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
 	}
 }
 
-static int tcp_in_window(struct ip_ct_tcp *state,
+static int tcp_in_window(struct nf_conn *ct,
+			 struct ip_ct_tcp *state,
 			 enum ip_conntrack_dir dir,
 			 unsigned int index,
 			 const struct sk_buff *skb,
@@ -506,6 +500,7 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 {
 	struct ip_ct_tcp_state *sender = &state->seen[dir];
 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
+	struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
 	__u32 seq, ack, sack, end, win, swin;
 	int res;
 
@@ -520,18 +515,17 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
 		tcp_sack(skb, dataoff, tcph, &sack);
 
-	DEBUGP("tcp_in_window: START\n");
-	DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-	       "seq=%u ack=%u sack=%u win=%u end=%u\n",
-		NIPQUAD(iph->saddr), ntohs(tcph->source),
-		NIPQUAD(iph->daddr), ntohs(tcph->dest),
-		seq, ack, sack, win, end);
-	DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
-	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-		sender->td_end, sender->td_maxend, sender->td_maxwin,
-		sender->td_scale,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-		receiver->td_scale);
+	pr_debug("tcp_in_window: START\n");
+	pr_debug("tcp_in_window: ");
+	NF_CT_DUMP_TUPLE(tuple);
+	pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n",
+		 seq, ack, sack, win, end);
+	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+		 sender->td_end, sender->td_maxend, sender->td_maxwin,
+		 sender->td_scale,
+		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+		 receiver->td_scale);
 
 	if (sender->td_end == 0) {
 		/*
@@ -609,23 +603,22 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 		 */
 		seq = end = sender->td_end;
 
-	DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-	       "seq=%u ack=%u sack =%u win=%u end=%u\n",
-		NIPQUAD(iph->saddr), ntohs(tcph->source),
-		NIPQUAD(iph->daddr), ntohs(tcph->dest),
-		seq, ack, sack, win, end);
-	DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
-	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-		sender->td_end, sender->td_maxend, sender->td_maxwin,
-		sender->td_scale,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-		receiver->td_scale);
-
-	DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
-		before(seq, sender->td_maxend + 1),
-		after(end, sender->td_end - receiver->td_maxwin - 1),
-		before(sack, receiver->td_end + 1),
-		after(ack, receiver->td_end - MAXACKWINDOW(sender)));
+	pr_debug("tcp_in_window: ");
+	NF_CT_DUMP_TUPLE(tuple);
+	pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n",
+		 seq, ack, sack, win, end);
+	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+		 sender->td_end, sender->td_maxend, sender->td_maxwin,
+		 sender->td_scale,
+		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+		 receiver->td_scale);
+
+	pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
+		 before(seq, sender->td_maxend + 1),
+		 after(end, sender->td_end - receiver->td_maxwin - 1),
+		 before(sack, receiver->td_end + 1),
+		 after(ack, receiver->td_end - MAXACKWINDOW(sender)));
 
 	if (before(seq, sender->td_maxend + 1) &&
 	    after(end, sender->td_end - receiver->td_maxwin - 1) &&
@@ -694,10 +687,10 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 			: "SEQ is over the upper bound (over the window of the receiver)");
 	}
 
-	DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
-	       "receiver end=%u maxend=%u maxwin=%u\n",
-		res, sender->td_end, sender->td_maxend, sender->td_maxwin,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
+	pr_debug("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
+		 "receiver end=%u maxend=%u maxwin=%u\n",
+		 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
+		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
 
 	return res;
 }
@@ -711,11 +704,9 @@ void nf_conntrack_tcp_update(struct sk_buff *skb,
 			     int dir)
 {
 	struct tcphdr *tcph = (void *)skb->data + dataoff;
-	__u32 end;
-#ifdef DEBUGP_VARS
 	struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
 	struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
-#endif
+	__u32 end;
 
 	end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
 
@@ -727,12 +718,12 @@ void nf_conntrack_tcp_update(struct sk_buff *skb,
 		conntrack->proto.tcp.seen[dir].td_end = end;
 	conntrack->proto.tcp.last_end = end;
 	write_unlock_bh(&tcp_lock);
-	DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
-	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-		sender->td_end, sender->td_maxend, sender->td_maxwin,
-		sender->td_scale,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-		receiver->td_scale);
+	pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
+		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+		 sender->td_end, sender->td_maxend, sender->td_maxwin,
+		 sender->td_scale,
+		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+		 receiver->td_scale);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
 #endif
@@ -823,6 +814,7 @@ static int tcp_packet(struct nf_conn *conntrack,
 		      int pf,
 		      unsigned int hooknum)
 {
+	struct nf_conntrack_tuple *tuple;
 	enum tcp_conntrack new_state, old_state;
 	enum ip_conntrack_dir dir;
 	struct tcphdr *th, _tcph;
@@ -837,6 +829,7 @@ static int tcp_packet(struct nf_conn *conntrack,
 	dir = CTINFO2DIR(ctinfo);
 	index = get_conntrack_index(th);
 	new_state = tcp_conntracks[dir][index][old_state];
+	tuple = &conntrack->tuplehash[dir].tuple;
 
 	switch (new_state) {
 	case TCP_CONNTRACK_IGNORE:
@@ -880,9 +873,8 @@ static int tcp_packet(struct nf_conn *conntrack,
 		return NF_ACCEPT;
 	case TCP_CONNTRACK_MAX:
 		/* Invalid packet */
-		DEBUGP("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
-		       dir, get_conntrack_index(th),
-		       old_state);
+		pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
+			 dir, get_conntrack_index(th), old_state);
 		write_unlock_bh(&tcp_lock);
 		if (LOG_INVALID(IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
@@ -933,7 +925,7 @@ static int tcp_packet(struct nf_conn *conntrack,
 		break;
 	}
 
-	if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
+	if (!tcp_in_window(conntrack, &conntrack->proto.tcp, dir, index,
 			   skb, dataoff, th, pf)) {
 		write_unlock_bh(&tcp_lock);
 		return -NF_ACCEPT;
@@ -942,13 +934,12 @@ static int tcp_packet(struct nf_conn *conntrack,
 	/* From now on we have got in-window packets */
 	conntrack->proto.tcp.last_index = index;
 
-	DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-	       "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
-		NIPQUAD(iph->saddr), ntohs(th->source),
-		NIPQUAD(iph->daddr), ntohs(th->dest),
-		(th->syn ? 1 : 0), (th->ack ? 1 : 0),
-		(th->fin ? 1 : 0), (th->rst ? 1 : 0),
-		old_state, new_state);
+	pr_debug("tcp_conntracks: ");
+	NF_CT_DUMP_TUPLE(tuple);
+	pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
+		 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
+		 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
+		 old_state, new_state);
 
 	conntrack->proto.tcp.state = new_state;
 	if (old_state != new_state
@@ -997,10 +988,8 @@ static int tcp_new(struct nf_conn *conntrack,
 {
 	enum tcp_conntrack new_state;
 	struct tcphdr *th, _tcph;
-#ifdef DEBUGP_VARS
 	struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
 	struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
-#endif
 
 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 	BUG_ON(th == NULL);
@@ -1012,7 +1001,7 @@ static int tcp_new(struct nf_conn *conntrack,
 
 	/* Invalid: delete conntrack */
 	if (new_state >= TCP_CONNTRACK_MAX) {
-		DEBUGP("nf_ct_tcp: invalid new deleting.\n");
+		pr_debug("nf_ct_tcp: invalid new deleting.\n");
 		return 0;
 	}
 
@@ -1065,12 +1054,12 @@ static int tcp_new(struct nf_conn *conntrack,
 	conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
 	conntrack->proto.tcp.last_index = TCP_NONE_SET;
 
-	DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
-	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-		sender->td_end, sender->td_maxend, sender->td_maxwin,
-		sender->td_scale,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-		receiver->td_scale);
+	pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
+		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+		 sender->td_end, sender->td_maxend, sender->td_maxwin,
+		 sender->td_scale,
+		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+		 receiver->td_scale);
 	return 1;
 }
 
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 627eda7..355d371 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -40,12 +40,6 @@ static u_int16_t ports[MAX_PORTS];
 static unsigned int ports_c;
 module_param_array(ports, ushort, &ports_c, 0400);
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 struct sane_request {
 	__be32 RPC_code;
 #define SANE_NET_START      7   /* RPC code */
@@ -125,15 +119,15 @@ static int help(struct sk_buff **pskb,
 	ct_sane_info->state = SANE_STATE_NORMAL;
 
 	if (datalen < sizeof(struct sane_reply_net_start)) {
-		DEBUGP("nf_ct_sane: NET_START reply too short\n");
+		pr_debug("nf_ct_sane: NET_START reply too short\n");
 		goto out;
 	}
 
 	reply = (struct sane_reply_net_start *)sb_ptr;
 	if (reply->status != htonl(SANE_STATUS_SUCCESS)) {
 		/* saned refused the command */
-		DEBUGP("nf_ct_sane: unsuccessful SANE_STATUS = %u\n",
-			ntohl(reply->status));
+		pr_debug("nf_ct_sane: unsuccessful SANE_STATUS = %u\n",
+			 ntohl(reply->status));
 		goto out;
 	}
 
@@ -151,9 +145,8 @@ static int help(struct sk_buff **pskb,
 	nf_ct_expect_init(exp, family, &tuple->src.u3, &tuple->dst.u3,
 			  IPPROTO_TCP, NULL, &reply->port);
 
-	DEBUGP("nf_ct_sane: expect: ");
+	pr_debug("nf_ct_sane: expect: ");
 	NF_CT_DUMP_TUPLE(&exp->tuple);
-	NF_CT_DUMP_TUPLE(&exp->mask);
 
 	/* Can't expect this?  Best to drop packet now. */
 	if (nf_ct_expect_related(exp) != 0)
@@ -176,9 +169,9 @@ static void nf_conntrack_sane_fini(void)
 
 	for (i = 0; i < ports_c; i++) {
 		for (j = 0; j < 2; j++) {
-			DEBUGP("nf_ct_sane: unregistering helper for pf: %d "
-			       "port: %d\n",
-				sane[i][j].tuple.src.l3num, ports[i]);
+			pr_debug("nf_ct_sane: unregistering helper for pf: %d "
+				 "port: %d\n",
+				 sane[i][j].tuple.src.l3num, ports[i]);
 			nf_conntrack_helper_unregister(&sane[i][j]);
 		}
 	}
@@ -217,9 +210,9 @@ static int __init nf_conntrack_sane_init(void)
 				sprintf(tmpname, "sane-%d", ports[i]);
 			sane[i][j].name = tmpname;
 
-			DEBUGP("nf_ct_sane: registering helper for pf: %d "
-			       "port: %d\n",
-				sane[i][j].tuple.src.l3num, ports[i]);
+			pr_debug("nf_ct_sane: registering helper for pf: %d "
+				 "port: %d\n",
+				 sane[i][j].tuple.src.l3num, ports[i]);
 			ret = nf_conntrack_helper_register(&sane[i][j]);
 			if (ret) {
 				printk(KERN_ERR "nf_ct_sane: failed to "
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 5b78f0e..1276a44 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -21,12 +21,6 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <linux/netfilter/nf_conntrack_sip.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
 MODULE_DESCRIPTION("SIP connection tracking helper");
@@ -285,7 +279,7 @@ static int epaddr_len(struct nf_conn *ct, const char *dptr,
 	const char *aux = dptr;
 
 	if (!parse_addr(ct, dptr, &dptr, &addr, limit)) {
-		DEBUGP("ip: %s parse failed.!\n", dptr);
+		pr_debug("ip: %s parse failed.!\n", dptr);
 		return 0;
 	}
 
@@ -344,8 +338,8 @@ int ct_sip_get_info(struct nf_conn *ct,
 				    ct_sip_lnlen(dptr, limit),
 				    hnfo->case_sensitive);
 		if (!aux) {
-			DEBUGP("'%s' not found in '%s'.\n", hnfo->ln_str,
-			       hnfo->lname);
+			pr_debug("'%s' not found in '%s'.\n", hnfo->ln_str,
+				 hnfo->lname);
 			return -1;
 		}
 		aux += hnfo->ln_strlen;
@@ -356,11 +350,11 @@ int ct_sip_get_info(struct nf_conn *ct,
 
 		*matchoff = (aux - k) + shift;
 
-		DEBUGP("%s match succeeded! - len: %u\n", hnfo->lname,
-		       *matchlen);
+		pr_debug("%s match succeeded! - len: %u\n", hnfo->lname,
+			 *matchlen);
 		return 1;
 	}
-	DEBUGP("%s header not found.\n", hnfo->lname);
+	pr_debug("%s header not found.\n", hnfo->lname);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ct_sip_get_info);
@@ -424,7 +418,7 @@ static int sip_help(struct sk_buff **pskb,
 	if (!skb_is_nonlinear(*pskb))
 		dptr = (*pskb)->data + dataoff;
 	else {
-		DEBUGP("Copy of skbuff not supported yet.\n");
+		pr_debug("Copy of skbuff not supported yet.\n");
 		goto out;
 	}
 
@@ -518,7 +512,7 @@ static int __init nf_conntrack_sip_init(void)
 				sprintf(tmpname, "sip-%u", i);
 			sip[i][j].name = tmpname;
 
-			DEBUGP("port #%u: %u\n", i, ports[i]);
+			pr_debug("port #%u: %u\n", i, ports[i]);
 
 			ret = nf_conntrack_helper_register(&sip[i][j]);
 			if (ret) {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 6af96c6..54498bc 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -25,12 +25,6 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 MODULE_LICENSE("GPL");
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index db0387c..cc19506 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -29,13 +29,6 @@ static int ports_c;
 module_param_array(ports, ushort, &ports_c, 0400);
 MODULE_PARM_DESC(ports, "Port numbers of TFTP servers");
 
-#if 0
-#define DEBUGP(format, args...) printk("%s:%s:" format, \
-				       __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
 unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb,
 				 enum ip_conntrack_info ctinfo,
 				 struct nf_conntrack_expect *exp) __read_mostly;
@@ -62,7 +55,6 @@ static int tftp_help(struct sk_buff **pskb,
 	case TFTP_OPCODE_READ:
 	case TFTP_OPCODE_WRITE:
 		/* RRQ and WRQ works the same way */
-		DEBUGP("");
 		NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 		NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
@@ -73,9 +65,8 @@ static int tftp_help(struct sk_buff **pskb,
 		nf_ct_expect_init(exp, family, &tuple->src.u3, &tuple->dst.u3,
 				  IPPROTO_UDP, NULL, &tuple->dst.u.udp.port);
 
-		DEBUGP("expect: ");
+		pr_debug("expect: ");
 		NF_CT_DUMP_TUPLE(&exp->tuple);
-		NF_CT_DUMP_TUPLE(&exp->mask);
 
 		nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
 		if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
@@ -86,13 +77,13 @@ static int tftp_help(struct sk_buff **pskb,
 		break;
 	case TFTP_OPCODE_DATA:
 	case TFTP_OPCODE_ACK:
-		DEBUGP("Data/ACK opcode\n");
+		pr_debug("Data/ACK opcode\n");
 		break;
 	case TFTP_OPCODE_ERROR:
-		DEBUGP("Error opcode\n");
+		pr_debug("Error opcode\n");
 		break;
 	default:
-		DEBUGP("Unknown opcode\n");
+		pr_debug("Unknown opcode\n");
 	}
 	return ret;
 }
-- 
cgit v0.10.2


From ce7663d84a87bb4e1743f62950bf7dceed723a13 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sat, 7 Jul 2007 22:40:08 -0700
Subject: [NETFILTER]: nfnetlink_queue: don't unregister handler of other
 subsystem

The queue handlers registered by ip[6]_queue.ko at initialization should
not be unregistered according to requests from userland program
using nfnetlink_queue. If we allow that, there is no way to register
the handlers of built-in ip[6]_queue again.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 10b5c62..0eed0b7 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -275,7 +275,8 @@ struct nf_queue_handler {
 };
 extern int nf_register_queue_handler(int pf, 
                                      struct nf_queue_handler *qh);
-extern int nf_unregister_queue_handler(int pf);
+extern int nf_unregister_queue_handler(int pf,
+				       struct nf_queue_handler *qh);
 extern void nf_unregister_queue_handlers(struct nf_queue_handler *qh);
 extern void nf_reinject(struct sk_buff *skb,
 			struct nf_info *info,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index b1f2ace..f402894 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -44,12 +44,17 @@ int nf_register_queue_handler(int pf, struct nf_queue_handler *qh)
 EXPORT_SYMBOL(nf_register_queue_handler);
 
 /* The caller must flush their queue before this */
-int nf_unregister_queue_handler(int pf)
+int nf_unregister_queue_handler(int pf, struct nf_queue_handler *qh)
 {
 	if (pf >= NPROTO)
 		return -EINVAL;
 
 	write_lock_bh(&queue_handler_lock);
+	if (queue_handler[pf] != qh) {
+		write_unlock_bh(&queue_handler_lock);
+		return -EINVAL;
+	}
+
 	queue_handler[pf] = NULL;
 	write_unlock_bh(&queue_handler_lock);
 
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7a97bec..7d47fc4 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -913,9 +913,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 		case NFQNL_CFG_CMD_PF_UNBIND:
 			QDEBUG("unregistering queue handler for pf=%u\n",
 				ntohs(cmd->pf));
-			/* This is a bug and a feature.  We can unregister
-			 * other handlers(!) */
-			ret = nf_unregister_queue_handler(ntohs(cmd->pf));
+			ret = nf_unregister_queue_handler(ntohs(cmd->pf), &nfqh);
 			break;
 		default:
 			ret = -EINVAL;
-- 
cgit v0.10.2


From 585426fdc5b4cccaacf0afc8cf821ff763750ae8 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sat, 7 Jul 2007 22:40:26 -0700
Subject: [NETFILTER]: nf_queue: Use RCU and mutex for queue handlers

Queue handlers are registered/unregistered in only process context.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index f402894..823fbf4 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -17,7 +17,7 @@
  */
 static struct nf_queue_handler *queue_handler[NPROTO];
 
-static DEFINE_RWLOCK(queue_handler_lock);
+static DEFINE_MUTEX(queue_handler_mutex);
 
 /* return EBUSY when somebody else is registered, return EEXIST if the
  * same handler is registered, return 0 in case of success. */
@@ -28,16 +28,16 @@ int nf_register_queue_handler(int pf, struct nf_queue_handler *qh)
 	if (pf >= NPROTO)
 		return -EINVAL;
 
-	write_lock_bh(&queue_handler_lock);
+	mutex_lock(&queue_handler_mutex);
 	if (queue_handler[pf] == qh)
 		ret = -EEXIST;
 	else if (queue_handler[pf])
 		ret = -EBUSY;
 	else {
-		queue_handler[pf] = qh;
+		rcu_assign_pointer(queue_handler[pf], qh);
 		ret = 0;
 	}
-	write_unlock_bh(&queue_handler_lock);
+	mutex_unlock(&queue_handler_mutex);
 
 	return ret;
 }
@@ -49,14 +49,16 @@ int nf_unregister_queue_handler(int pf, struct nf_queue_handler *qh)
 	if (pf >= NPROTO)
 		return -EINVAL;
 
-	write_lock_bh(&queue_handler_lock);
+	mutex_lock(&queue_handler_mutex);
 	if (queue_handler[pf] != qh) {
-		write_unlock_bh(&queue_handler_lock);
+		mutex_unlock(&queue_handler_mutex);
 		return -EINVAL;
 	}
 
-	queue_handler[pf] = NULL;
-	write_unlock_bh(&queue_handler_lock);
+	rcu_assign_pointer(queue_handler[pf], NULL);
+	mutex_unlock(&queue_handler_mutex);
+
+	synchronize_rcu();
 
 	return 0;
 }
@@ -66,12 +68,14 @@ void nf_unregister_queue_handlers(struct nf_queue_handler *qh)
 {
 	int pf;
 
-	write_lock_bh(&queue_handler_lock);
+	mutex_lock(&queue_handler_mutex);
 	for (pf = 0; pf < NPROTO; pf++)  {
 		if (queue_handler[pf] == qh)
-			queue_handler[pf] = NULL;
+			rcu_assign_pointer(queue_handler[pf], NULL);
 	}
-	write_unlock_bh(&queue_handler_lock);
+	mutex_unlock(&queue_handler_mutex);
+
+	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
 
@@ -94,18 +98,21 @@ static int __nf_queue(struct sk_buff *skb,
 	struct net_device *physoutdev = NULL;
 #endif
 	struct nf_afinfo *afinfo;
+	struct nf_queue_handler *qh;
 
 	/* QUEUE == DROP if noone is waiting, to be safe. */
-	read_lock(&queue_handler_lock);
-	if (!queue_handler[pf]) {
-		read_unlock(&queue_handler_lock);
+	rcu_read_lock();
+
+	qh = rcu_dereference(queue_handler[pf]);
+	if (!qh) {
+		rcu_read_unlock();
 		kfree_skb(skb);
 		return 1;
 	}
 
 	afinfo = nf_get_afinfo(pf);
 	if (!afinfo) {
-		read_unlock(&queue_handler_lock);
+		rcu_read_unlock();
 		kfree_skb(skb);
 		return 1;
 	}
@@ -115,7 +122,7 @@ static int __nf_queue(struct sk_buff *skb,
 		if (net_ratelimit())
 			printk(KERN_ERR "OOM queueing packet %p\n",
 			       skb);
-		read_unlock(&queue_handler_lock);
+		rcu_read_unlock();
 		kfree_skb(skb);
 		return 1;
 	}
@@ -125,7 +132,7 @@ static int __nf_queue(struct sk_buff *skb,
 
 	/* If it's going away, ignore hook. */
 	if (!try_module_get(info->elem->owner)) {
-		read_unlock(&queue_handler_lock);
+		rcu_read_unlock();
 		kfree(info);
 		return 0;
 	}
@@ -143,10 +150,9 @@ static int __nf_queue(struct sk_buff *skb,
 	}
 #endif
 	afinfo->saveroute(skb, info);
-	status = queue_handler[pf]->outfn(skb, info, queuenum,
-					  queue_handler[pf]->data);
+	status = qh->outfn(skb, info, queuenum, qh->data);
 
-	read_unlock(&queue_handler_lock);
+	rcu_read_unlock();
 
 	if (status < 0) {
 		/* James M doesn't say fuck enough. */
@@ -313,13 +319,13 @@ static int seq_show(struct seq_file *s, void *v)
 	loff_t *pos = v;
 	struct nf_queue_handler *qh;
 
-	read_lock_bh(&queue_handler_lock);
-	qh = queue_handler[*pos];
+	rcu_read_lock();
+	qh = rcu_dereference(queue_handler[*pos]);
 	if (!qh)
 		ret = seq_printf(s, "%2lld NONE\n", *pos);
 	else
 		ret = seq_printf(s, "%2lld %s\n", *pos, qh->name);
-	read_unlock_bh(&queue_handler_lock);
+	rcu_read_unlock();
 
 	return ret;
 }
-- 
cgit v0.10.2


From 5faf41535214b68c989a22302c8870f8758cbb8c Mon Sep 17 00:00:00 2001
From: Balazs Scheidler <bazsi@balabit.hu>
Date: Sat, 7 Jul 2007 22:41:01 -0700
Subject: [NETFILTER]: x_tables: add more detail to error message about
 match/target mask mismatch

Signed-off-by: Balazs Scheidler <bazsi@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 0eb2504..520eddf 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -320,8 +320,8 @@ int xt_check_match(const struct xt_match *match, unsigned short family,
 		return -EINVAL;
 	}
 	if (match->hooks && (hook_mask & ~match->hooks) != 0) {
-		printk("%s_tables: %s match: bad hook_mask %u\n",
-		       xt_prefix[family], match->name, hook_mask);
+		printk("%s_tables: %s match: bad hook_mask %u/%u\n",
+		       xt_prefix[family], match->name, hook_mask, match->hooks);
 		return -EINVAL;
 	}
 	if (match->proto && (match->proto != proto || inv_proto)) {
@@ -410,8 +410,9 @@ int xt_check_target(const struct xt_target *target, unsigned short family,
 		return -EINVAL;
 	}
 	if (target->hooks && (hook_mask & ~target->hooks) != 0) {
-		printk("%s_tables: %s target: bad hook_mask %u\n",
-		       xt_prefix[family], target->name, hook_mask);
+		printk("%s_tables: %s target: bad hook_mask %u/%u\n",
+		       xt_prefix[family], target->name, hook_mask,
+		       target->hooks);
 		return -EINVAL;
 	}
 	if (target->proto && (target->proto != proto || inv_proto)) {
-- 
cgit v0.10.2


From deaf391b4cc9d9f8e2b2793ebd56da776b54197a Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 7 Jul 2007 22:48:00 -0700
Subject: [BNX2]: Add bnx2_set_default_link().

Put existing code to setup the default link settings in this new
function.  This makes it easier to support the remote PHY feature in
the next few patches.

Also change ETHTOOL_ALL_FIBRE_SPEED to include 2500Mbps if supported.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 0f4f76f..56bc41e 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -1323,7 +1323,9 @@ bnx2_setup_serdes_phy(struct bnx2 *bp)
 }
 
 #define ETHTOOL_ALL_FIBRE_SPEED						\
-	(ADVERTISED_1000baseT_Full)
+	(bp->phy_flags & PHY_2_5G_CAPABLE_FLAG) ?			\
+		(ADVERTISED_2500baseX_Full | ADVERTISED_1000baseT_Full) :\
+		(ADVERTISED_1000baseT_Full)
 
 #define ETHTOOL_ALL_COPPER_SPEED					\
 	(ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full |		\
@@ -1335,6 +1337,27 @@ bnx2_setup_serdes_phy(struct bnx2 *bp)
 
 #define PHY_ALL_1000_SPEED (ADVERTISE_1000HALF | ADVERTISE_1000FULL)
 
+static void
+bnx2_set_default_link(struct bnx2 *bp)
+{
+	bp->autoneg = AUTONEG_SPEED | AUTONEG_FLOW_CTRL;
+	bp->req_line_speed = 0;
+	if (bp->phy_flags & PHY_SERDES_FLAG) {
+		u32 reg;
+
+		bp->advertising = ETHTOOL_ALL_FIBRE_SPEED | ADVERTISED_Autoneg;
+
+		reg = REG_RD_IND(bp, bp->shmem_base + BNX2_PORT_HW_CFG_CONFIG);
+		reg &= BNX2_PORT_HW_CFG_CFG_DFLT_LINK_MASK;
+		if (reg == BNX2_PORT_HW_CFG_CFG_DFLT_LINK_1G) {
+			bp->autoneg = 0;
+			bp->req_line_speed = bp->line_speed = SPEED_1000;
+			bp->req_duplex = DUPLEX_FULL;
+		}
+	} else
+		bp->advertising = ETHTOOL_ALL_COPPER_SPEED | ADVERTISED_Autoneg;
+}
+
 static int
 bnx2_setup_copper_phy(struct bnx2 *bp)
 {
@@ -6374,23 +6397,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 		}
 	}
 
-	bp->autoneg = AUTONEG_SPEED | AUTONEG_FLOW_CTRL;
-	bp->req_line_speed = 0;
-	if (bp->phy_flags & PHY_SERDES_FLAG) {
-		bp->advertising = ETHTOOL_ALL_FIBRE_SPEED | ADVERTISED_Autoneg;
-
-		reg = REG_RD_IND(bp, bp->shmem_base + BNX2_PORT_HW_CFG_CONFIG);
-		reg &= BNX2_PORT_HW_CFG_CFG_DFLT_LINK_MASK;
-		if (reg == BNX2_PORT_HW_CFG_CFG_DFLT_LINK_1G) {
-			bp->autoneg = 0;
-			bp->req_line_speed = bp->line_speed = SPEED_1000;
-			bp->req_duplex = DUPLEX_FULL;
-		}
-	}
-	else {
-		bp->advertising = ETHTOOL_ALL_COPPER_SPEED | ADVERTISED_Autoneg;
-	}
-
+	bnx2_set_default_link(bp);
 	bp->req_flow_ctrl = FLOW_CTRL_RX | FLOW_CTRL_TX;
 
 	init_timer(&bp->timer);
-- 
cgit v0.10.2


From 9700e6befea505b03b6e625536a9a106811e9a14 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 7 Jul 2007 22:48:31 -0700
Subject: [BNX2]: Add remote PHY bit definitions.

Add new fields in struct bnx2 and other bit definitions in shared
memory to support remote PHY.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h
index 49a5de2..14c0a1e 100644
--- a/drivers/net/bnx2.h
+++ b/drivers/net/bnx2.h
@@ -6537,6 +6537,7 @@ struct bnx2 {
 #define PHY_INT_MODE_AUTO_POLLING_FLAG	0x100
 #define PHY_INT_MODE_LINK_READY_FLAG	0x200
 #define PHY_DIS_EARLY_DAC_FLAG		0x400
+#define REMOTE_PHY_CAP_FLAG		0x800
 
 	u32			mii_bmcr;
 	u32			mii_bmsr;
@@ -6625,6 +6626,7 @@ struct bnx2 {
 	u16			req_line_speed;
 	u8			req_duplex;
 
+	u8			phy_port;
 	u8			link_up;
 
 	u16			line_speed;
@@ -6770,7 +6772,7 @@ struct fw_info {
  * the firmware has timed out, the driver will assume there is no firmware
  * running and there won't be any firmware-driver synchronization during a
  * driver reset. */
-#define FW_ACK_TIME_OUT_MS                  100
+#define FW_ACK_TIME_OUT_MS                  1000
 
 
 #define BNX2_DRV_RESET_SIGNATURE		0x00000000
@@ -6788,6 +6790,7 @@ struct fw_info {
 #define BNX2_DRV_MSG_CODE_DIAG			 0x07000000
 #define BNX2_DRV_MSG_CODE_SUSPEND_NO_WOL	 0x09000000
 #define BNX2_DRV_MSG_CODE_UNLOAD_LNK_DN		 0x0b000000
+#define BNX2_DRV_MSG_CODE_CMD_SET_LINK		 0x10000000
 
 #define BNX2_DRV_MSG_DATA			 0x00ff0000
 #define BNX2_DRV_MSG_DATA_WAIT0			 0x00010000
@@ -6845,6 +6848,30 @@ struct fw_info {
  * This is used for debugging. */
 #define BNX2_DRV_MSG_DATA_PULSE_CODE_ALWAYS_ALIVE	 0x00080000
 
+#define BNX2_DRV_MB_ARG0			0x00000014
+#define BNX2_NETLINK_SET_LINK_SPEED_10HALF	 (1<<0)
+#define BNX2_NETLINK_SET_LINK_SPEED_10FULL	 (1<<1)
+#define BNX2_NETLINK_SET_LINK_SPEED_10		 \
+	(BNX2_NETLINK_SET_LINK_SPEED_10HALF |	 \
+	 BNX2_NETLINK_SET_LINK_SPEED_10FULL)
+#define BNX2_NETLINK_SET_LINK_SPEED_100HALF	 (1<<2)
+#define BNX2_NETLINK_SET_LINK_SPEED_100FULL	 (1<<3)
+#define BNX2_NETLINK_SET_LINK_SPEED_100		 \
+	(BNX2_NETLINK_SET_LINK_SPEED_100HALF |	 \
+	 BNX2_NETLINK_SET_LINK_SPEED_100FULL)
+#define BNX2_NETLINK_SET_LINK_SPEED_1GHALF	 (1<<4)
+#define BNX2_NETLINK_SET_LINK_SPEED_1GFULL	 (1<<5)
+#define BNX2_NETLINK_SET_LINK_SPEED_2G5HALF	 (1<<6)
+#define BNX2_NETLINK_SET_LINK_SPEED_2G5FULL	 (1<<7)
+#define BNX2_NETLINK_SET_LINK_SPEED_10GHALF	 (1<<8)
+#define BNX2_NETLINK_SET_LINK_SPEED_10GFULL	 (1<<9)
+#define BNX2_NETLINK_SET_LINK_ENABLE_AUTONEG	 (1<<10)
+#define BNX2_NETLINK_SET_LINK_PHY_APP_REMOTE	 (1<<11)
+#define BNX2_NETLINK_SET_LINK_FC_SYM_PAUSE	 (1<<12)
+#define BNX2_NETLINK_SET_LINK_FC_ASYM_PAUSE	 (1<<13)
+#define BNX2_NETLINK_SET_LINK_ETH_AT_WIRESPEED	 (1<<14)
+#define BNX2_NETLINK_SET_LINK_PHY_RESET		 (1<<15)
+
 #define BNX2_DEV_INFO_SIGNATURE			0x00000020
 #define BNX2_DEV_INFO_SIGNATURE_MAGIC		 0x44564900
 #define BNX2_DEV_INFO_SIGNATURE_MAGIC_MASK	 0xffffff00
@@ -7065,6 +7092,28 @@ struct fw_info {
 #define BNX2_BC_STATE_BC_DBG_CMD_LOOP_CNT_MASK	 0xffff
 #define BNX2_BC_STATE_BC_DBG_CMD_LOOP_INFINITE	 0xffff
 
+#define BNX2_FW_EVT_CODE_MB			0x354
+#define BNX2_FW_EVT_CODE_SW_TIMER_EXPIRATION_EVENT 0x00000000
+#define BNX2_FW_EVT_CODE_LINK_EVENT		 0x00000001
+
+#define BNX2_DRV_ACK_CAP_MB			0x364
+#define BNX2_DRV_ACK_CAP_SIGNATURE		 0x35450000
+#define BNX2_CAPABILITY_SIGNATURE_MASK		 0xFFFF0000
+
+#define BNX2_FW_CAP_MB				0x368
+#define BNX2_FW_CAP_SIGNATURE			 0xaa550000
+#define BNX2_FW_ACK_DRV_SIGNATURE		 0x52500000
+#define BNX2_FW_CAP_SIGNATURE_MASK		 0xffff0000
+#define BNX2_FW_CAP_REMOTE_PHY_CAPABLE		 0x00000001
+#define BNX2_FW_CAP_REMOTE_PHY_PRESENT		 0x00000002
+
+#define BNX2_RPHY_SIGNATURE			0x36c
+#define BNX2_RPHY_LOAD_SIGNATURE		 0x5a5a5a5a
+
+#define BNX2_RPHY_FLAGS				0x370
+#define BNX2_RPHY_SERDES_LINK			0x374
+#define BNX2_RPHY_COPPER_LINK			0x378
+
 #define HOST_VIEW_SHMEM_BASE			0x167c00
 
 #endif
-- 
cgit v0.10.2


From 0d8a6571051d23c214d7a316976138a6fd8bda1c Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 7 Jul 2007 22:49:43 -0700
Subject: [BNX2]: Add support for remote PHY.

In blade servers, the Serdes PHY in 5708S can control the remote
copper PHY through autonegotiation on the backplane.  This patch adds
the logic to interface with the firmware to control the remote PHY
autonegotiation and to handle remote PHY link events.

When remote PHY is present, the 5708S Serdes device practically
becomes a copper device with full control over the 1000Base-T
link settings.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 56bc41e..c571da6 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -550,6 +550,9 @@ bnx2_report_fw_link(struct bnx2 *bp)
 {
 	u32 fw_link_status = 0;
 
+	if (bp->phy_flags & REMOTE_PHY_CAP_FLAG)
+		return;
+
 	if (bp->link_up) {
 		u32 bmsr;
 
@@ -1100,6 +1103,9 @@ bnx2_set_link(struct bnx2 *bp)
 		return 0;
 	}
 
+	if (bp->phy_flags & REMOTE_PHY_CAP_FLAG)
+		return 0;
+
 	link_up = bp->link_up;
 
 	bnx2_enable_bmsr1(bp);
@@ -1210,12 +1216,74 @@ bnx2_phy_get_pause_adv(struct bnx2 *bp)
 	return adv;
 }
 
+static int bnx2_fw_sync(struct bnx2 *, u32, int);
+
 static int
-bnx2_setup_serdes_phy(struct bnx2 *bp)
+bnx2_setup_remote_phy(struct bnx2 *bp, u8 port)
+{
+	u32 speed_arg = 0, pause_adv;
+
+	pause_adv = bnx2_phy_get_pause_adv(bp);
+
+	if (bp->autoneg & AUTONEG_SPEED) {
+		speed_arg |= BNX2_NETLINK_SET_LINK_ENABLE_AUTONEG;
+		if (bp->advertising & ADVERTISED_10baseT_Half)
+			speed_arg |= BNX2_NETLINK_SET_LINK_SPEED_10HALF;
+		if (bp->advertising & ADVERTISED_10baseT_Full)
+			speed_arg |= BNX2_NETLINK_SET_LINK_SPEED_10FULL;
+		if (bp->advertising & ADVERTISED_100baseT_Half)
+			speed_arg |= BNX2_NETLINK_SET_LINK_SPEED_100HALF;
+		if (bp->advertising & ADVERTISED_100baseT_Full)
+			speed_arg |= BNX2_NETLINK_SET_LINK_SPEED_100FULL;
+		if (bp->advertising & ADVERTISED_1000baseT_Full)
+			speed_arg |= BNX2_NETLINK_SET_LINK_SPEED_1GFULL;
+		if (bp->advertising & ADVERTISED_2500baseX_Full)
+			speed_arg |= BNX2_NETLINK_SET_LINK_SPEED_2G5FULL;
+	} else {
+		if (bp->req_line_speed == SPEED_2500)
+			speed_arg = BNX2_NETLINK_SET_LINK_SPEED_2G5FULL;
+		else if (bp->req_line_speed == SPEED_1000)
+			speed_arg = BNX2_NETLINK_SET_LINK_SPEED_1GFULL;
+		else if (bp->req_line_speed == SPEED_100) {
+			if (bp->req_duplex == DUPLEX_FULL)
+				speed_arg = BNX2_NETLINK_SET_LINK_SPEED_100FULL;
+			else
+				speed_arg = BNX2_NETLINK_SET_LINK_SPEED_100HALF;
+		} else if (bp->req_line_speed == SPEED_10) {
+			if (bp->req_duplex == DUPLEX_FULL)
+				speed_arg = BNX2_NETLINK_SET_LINK_SPEED_10FULL;
+			else
+				speed_arg = BNX2_NETLINK_SET_LINK_SPEED_10HALF;
+		}
+	}
+
+	if (pause_adv & (ADVERTISE_1000XPAUSE | ADVERTISE_PAUSE_CAP))
+		speed_arg |= BNX2_NETLINK_SET_LINK_FC_SYM_PAUSE;
+	if (pause_adv & (ADVERTISE_1000XPSE_ASYM | ADVERTISE_1000XPSE_ASYM))
+		speed_arg |= BNX2_NETLINK_SET_LINK_FC_ASYM_PAUSE;
+
+	if (port == PORT_TP)
+		speed_arg |= BNX2_NETLINK_SET_LINK_PHY_APP_REMOTE |
+			     BNX2_NETLINK_SET_LINK_ETH_AT_WIRESPEED;
+
+	REG_WR_IND(bp, bp->shmem_base + BNX2_DRV_MB_ARG0, speed_arg);
+
+	spin_unlock_bh(&bp->phy_lock);
+	bnx2_fw_sync(bp, BNX2_DRV_MSG_CODE_CMD_SET_LINK, 0);
+	spin_lock_bh(&bp->phy_lock);
+
+	return 0;
+}
+
+static int
+bnx2_setup_serdes_phy(struct bnx2 *bp, u8 port)
 {
 	u32 adv, bmcr;
 	u32 new_adv = 0;
 
+	if (bp->phy_flags & REMOTE_PHY_CAP_FLAG)
+		return (bnx2_setup_remote_phy(bp, port));
+
 	if (!(bp->autoneg & AUTONEG_SPEED)) {
 		u32 new_bmcr;
 		int force_link_down = 0;
@@ -1338,8 +1406,58 @@ bnx2_setup_serdes_phy(struct bnx2 *bp)
 #define PHY_ALL_1000_SPEED (ADVERTISE_1000HALF | ADVERTISE_1000FULL)
 
 static void
+bnx2_set_default_remote_link(struct bnx2 *bp)
+{
+	u32 link;
+
+	if (bp->phy_port == PORT_TP)
+		link = REG_RD_IND(bp, bp->shmem_base + BNX2_RPHY_COPPER_LINK);
+	else
+		link = REG_RD_IND(bp, bp->shmem_base + BNX2_RPHY_SERDES_LINK);
+
+	if (link & BNX2_NETLINK_SET_LINK_ENABLE_AUTONEG) {
+		bp->req_line_speed = 0;
+		bp->autoneg |= AUTONEG_SPEED;
+		bp->advertising = ADVERTISED_Autoneg;
+		if (link & BNX2_NETLINK_SET_LINK_SPEED_10HALF)
+			bp->advertising |= ADVERTISED_10baseT_Half;
+		if (link & BNX2_NETLINK_SET_LINK_SPEED_10FULL)
+			bp->advertising |= ADVERTISED_10baseT_Full;
+		if (link & BNX2_NETLINK_SET_LINK_SPEED_100HALF)
+			bp->advertising |= ADVERTISED_100baseT_Half;
+		if (link & BNX2_NETLINK_SET_LINK_SPEED_100FULL)
+			bp->advertising |= ADVERTISED_100baseT_Full;
+		if (link & BNX2_NETLINK_SET_LINK_SPEED_1GFULL)
+			bp->advertising |= ADVERTISED_1000baseT_Full;
+		if (link & BNX2_NETLINK_SET_LINK_SPEED_2G5FULL)
+			bp->advertising |= ADVERTISED_2500baseX_Full;
+	} else {
+		bp->autoneg = 0;
+		bp->advertising = 0;
+		bp->req_duplex = DUPLEX_FULL;
+		if (link & BNX2_NETLINK_SET_LINK_SPEED_10) {
+			bp->req_line_speed = SPEED_10;
+			if (link & BNX2_NETLINK_SET_LINK_SPEED_10HALF)
+				bp->req_duplex = DUPLEX_HALF;
+		}
+		if (link & BNX2_NETLINK_SET_LINK_SPEED_100) {
+			bp->req_line_speed = SPEED_100;
+			if (link & BNX2_NETLINK_SET_LINK_SPEED_100HALF)
+				bp->req_duplex = DUPLEX_HALF;
+		}
+		if (link & BNX2_NETLINK_SET_LINK_SPEED_1GFULL)
+			bp->req_line_speed = SPEED_1000;
+		if (link & BNX2_NETLINK_SET_LINK_SPEED_2G5FULL)
+			bp->req_line_speed = SPEED_2500;
+	}
+}
+
+static void
 bnx2_set_default_link(struct bnx2 *bp)
 {
+	if (bp->phy_flags & REMOTE_PHY_CAP_FLAG)
+		return bnx2_set_default_remote_link(bp);
+
 	bp->autoneg = AUTONEG_SPEED | AUTONEG_FLOW_CTRL;
 	bp->req_line_speed = 0;
 	if (bp->phy_flags & PHY_SERDES_FLAG) {
@@ -1358,6 +1476,97 @@ bnx2_set_default_link(struct bnx2 *bp)
 		bp->advertising = ETHTOOL_ALL_COPPER_SPEED | ADVERTISED_Autoneg;
 }
 
+static void
+bnx2_remote_phy_event(struct bnx2 *bp)
+{
+	u32 msg;
+	u8 link_up = bp->link_up;
+	u8 old_port;
+
+	msg = REG_RD_IND(bp, bp->shmem_base + BNX2_LINK_STATUS);
+
+	if ((msg & BNX2_LINK_STATUS_LINK_UP) == BNX2_LINK_STATUS_LINK_DOWN)
+		bp->link_up = 0;
+	else {
+		u32 speed;
+
+		bp->link_up = 1;
+		speed = msg & BNX2_LINK_STATUS_SPEED_MASK;
+		bp->duplex = DUPLEX_FULL;
+		switch (speed) {
+			case BNX2_LINK_STATUS_10HALF:
+				bp->duplex = DUPLEX_HALF;
+			case BNX2_LINK_STATUS_10FULL:
+				bp->line_speed = SPEED_10;
+				break;
+			case BNX2_LINK_STATUS_100HALF:
+				bp->duplex = DUPLEX_HALF;
+			case BNX2_LINK_STATUS_100BASE_T4:
+			case BNX2_LINK_STATUS_100FULL:
+				bp->line_speed = SPEED_100;
+				break;
+			case BNX2_LINK_STATUS_1000HALF:
+				bp->duplex = DUPLEX_HALF;
+			case BNX2_LINK_STATUS_1000FULL:
+				bp->line_speed = SPEED_1000;
+				break;
+			case BNX2_LINK_STATUS_2500HALF:
+				bp->duplex = DUPLEX_HALF;
+			case BNX2_LINK_STATUS_2500FULL:
+				bp->line_speed = SPEED_2500;
+				break;
+			default:
+				bp->line_speed = 0;
+				break;
+		}
+
+		spin_lock(&bp->phy_lock);
+		bp->flow_ctrl = 0;
+		if ((bp->autoneg & (AUTONEG_SPEED | AUTONEG_FLOW_CTRL)) !=
+		    (AUTONEG_SPEED | AUTONEG_FLOW_CTRL)) {
+			if (bp->duplex == DUPLEX_FULL)
+				bp->flow_ctrl = bp->req_flow_ctrl;
+		} else {
+			if (msg & BNX2_LINK_STATUS_TX_FC_ENABLED)
+				bp->flow_ctrl |= FLOW_CTRL_TX;
+			if (msg & BNX2_LINK_STATUS_RX_FC_ENABLED)
+				bp->flow_ctrl |= FLOW_CTRL_RX;
+		}
+
+		old_port = bp->phy_port;
+		if (msg & BNX2_LINK_STATUS_SERDES_LINK)
+			bp->phy_port = PORT_FIBRE;
+		else
+			bp->phy_port = PORT_TP;
+
+		if (old_port != bp->phy_port)
+			bnx2_set_default_link(bp);
+
+		spin_unlock(&bp->phy_lock);
+	}
+	if (bp->link_up != link_up)
+		bnx2_report_link(bp);
+
+	bnx2_set_mac_link(bp);
+}
+
+static int
+bnx2_set_remote_link(struct bnx2 *bp)
+{
+	u32 evt_code;
+
+	evt_code = REG_RD_IND(bp, bp->shmem_base + BNX2_FW_EVT_CODE_MB);
+	switch (evt_code) {
+		case BNX2_FW_EVT_CODE_LINK_EVENT:
+			bnx2_remote_phy_event(bp);
+			break;
+		case BNX2_FW_EVT_CODE_SW_TIMER_EXPIRATION_EVENT:
+		default:
+			break;
+	}
+	return 0;
+}
+
 static int
 bnx2_setup_copper_phy(struct bnx2 *bp)
 {
@@ -1456,13 +1665,13 @@ bnx2_setup_copper_phy(struct bnx2 *bp)
 }
 
 static int
-bnx2_setup_phy(struct bnx2 *bp)
+bnx2_setup_phy(struct bnx2 *bp, u8 port)
 {
 	if (bp->loopback == MAC_LOOPBACK)
 		return 0;
 
 	if (bp->phy_flags & PHY_SERDES_FLAG) {
-		return (bnx2_setup_serdes_phy(bp));
+		return (bnx2_setup_serdes_phy(bp, port));
 	}
 	else {
 		return (bnx2_setup_copper_phy(bp));
@@ -1682,6 +1891,9 @@ bnx2_init_phy(struct bnx2 *bp)
 
         REG_WR(bp, BNX2_EMAC_ATTENTION_ENA, BNX2_EMAC_ATTENTION_ENA_LINK);
 
+	if (bp->phy_flags & REMOTE_PHY_CAP_FLAG)
+		goto setup_phy;
+
 	bnx2_read_phy(bp, MII_PHYSID1, &val);
 	bp->phy_id = val << 16;
 	bnx2_read_phy(bp, MII_PHYSID2, &val);
@@ -1699,7 +1911,9 @@ bnx2_init_phy(struct bnx2 *bp)
 		rc = bnx2_init_copper_phy(bp);
 	}
 
-	bnx2_setup_phy(bp);
+setup_phy:
+	if (!rc)
+		rc = bnx2_setup_phy(bp, bp->phy_port);
 
 	return rc;
 }
@@ -2007,6 +2221,9 @@ bnx2_phy_int(struct bnx2 *bp)
 		bnx2_set_link(bp);
 		spin_unlock(&bp->phy_lock);
 	}
+	if (bnx2_phy_event_is_set(bp, STATUS_ATTN_BITS_TIMER_ABORT))
+		bnx2_set_remote_link(bp);
+
 }
 
 static void
@@ -2345,7 +2562,8 @@ bnx2_interrupt(int irq, void *dev_instance)
 	return IRQ_HANDLED;
 }
 
-#define STATUS_ATTN_EVENTS	STATUS_ATTN_BITS_LINK_STATE
+#define STATUS_ATTN_EVENTS	(STATUS_ATTN_BITS_LINK_STATE | \
+				 STATUS_ATTN_BITS_TIMER_ABORT)
 
 static inline int
 bnx2_has_work(struct bnx2 *bp)
@@ -3585,6 +3803,36 @@ nvram_write_end:
 	return rc;
 }
 
+static void
+bnx2_init_remote_phy(struct bnx2 *bp)
+{
+	u32 val;
+
+	bp->phy_flags &= ~REMOTE_PHY_CAP_FLAG;
+	if (!(bp->phy_flags & PHY_SERDES_FLAG))
+		return;
+
+	val = REG_RD_IND(bp, bp->shmem_base + BNX2_FW_CAP_MB);
+	if ((val & BNX2_FW_CAP_SIGNATURE_MASK) != BNX2_FW_CAP_SIGNATURE)
+		return;
+
+	if (val & BNX2_FW_CAP_REMOTE_PHY_CAPABLE) {
+		if (netif_running(bp->dev)) {
+			val = BNX2_DRV_ACK_CAP_SIGNATURE |
+			      BNX2_FW_CAP_REMOTE_PHY_CAPABLE;
+			REG_WR_IND(bp, bp->shmem_base + BNX2_DRV_ACK_CAP_MB,
+				   val);
+		}
+		bp->phy_flags |= REMOTE_PHY_CAP_FLAG;
+
+		val = REG_RD_IND(bp, bp->shmem_base + BNX2_LINK_STATUS);
+		if (val & BNX2_LINK_STATUS_SERDES_LINK)
+			bp->phy_port = PORT_FIBRE;
+		else
+			bp->phy_port = PORT_TP;
+	}
+}
+
 static int
 bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
 {
@@ -3665,6 +3913,12 @@ bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
 	if (rc)
 		return rc;
 
+	spin_lock_bh(&bp->phy_lock);
+	bnx2_init_remote_phy(bp);
+	if (bp->phy_flags & REMOTE_PHY_CAP_FLAG)
+		bnx2_set_default_remote_link(bp);
+	spin_unlock_bh(&bp->phy_lock);
+
 	if (CHIP_ID(bp) == CHIP_ID_5706_A0) {
 		/* Adjust the voltage regular to two steps lower.  The default
 		 * of this register is 0x0000000e. */
@@ -4092,8 +4346,8 @@ bnx2_init_nic(struct bnx2 *bp)
 
 	spin_lock_bh(&bp->phy_lock);
 	bnx2_init_phy(bp);
-	spin_unlock_bh(&bp->phy_lock);
 	bnx2_set_link(bp);
+	spin_unlock_bh(&bp->phy_lock);
 	return 0;
 }
 
@@ -4623,6 +4877,9 @@ bnx2_5706_serdes_timer(struct bnx2 *bp)
 static void
 bnx2_5708_serdes_timer(struct bnx2 *bp)
 {
+	if (bp->phy_flags & REMOTE_PHY_CAP_FLAG)
+		return;
+
 	if ((bp->phy_flags & PHY_2_5G_CAPABLE_FLAG) == 0) {
 		bp->serdes_an_pending = 0;
 		return;
@@ -5222,7 +5479,7 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 
 	spin_lock_bh(&bp->phy_lock);
 
-	bnx2_setup_phy(bp);
+	bnx2_setup_phy(bp, bp->phy_port);
 
 	spin_unlock_bh(&bp->phy_lock);
 
@@ -5566,7 +5823,7 @@ bnx2_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *epause)
 
 	spin_lock_bh(&bp->phy_lock);
 
-	bnx2_setup_phy(bp);
+	bnx2_setup_phy(bp, bp->phy_port);
 
 	spin_unlock_bh(&bp->phy_lock);
 
@@ -6338,7 +6595,9 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 	else if (CHIP_BOND_ID(bp) & CHIP_BOND_ID_SERDES_BIT)
 		bp->phy_flags |= PHY_SERDES_FLAG;
 
+	bp->phy_port = PORT_TP;
 	if (bp->phy_flags & PHY_SERDES_FLAG) {
+		bp->phy_port = PORT_FIBRE;
 		bp->flags |= NO_WOL_FLAG;
 		if (CHIP_NUM(bp) != CHIP_NUM_5706) {
 			bp->phy_addr = 2;
@@ -6347,6 +6606,8 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 			if (reg & BNX2_SHARED_HW_CFG_PHY_2_5G)
 				bp->phy_flags |= PHY_2_5G_CAPABLE_FLAG;
 		}
+		bnx2_init_remote_phy(bp);
+
 	} else if (CHIP_NUM(bp) == CHIP_NUM_5706 ||
 		   CHIP_NUM(bp) == CHIP_NUM_5708)
 		bp->phy_flags |= PHY_CRC_FIX_FLAG;
-- 
cgit v0.10.2


From 7b6b83474cb9bdd07dadfb7497a29c3005ad9d1d Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 7 Jul 2007 22:50:15 -0700
Subject: [BNX2]: Add ethtool support for remote PHY.

Modify the driver's ethtool_ops->get_settings and set_settings
functions to support remote PHY.  Users control the remote copper
PHY settings by specifying link settings for the tp (twisted pair)
port.

The nway_reset function is also modified to support remote PHY.
mii-tool operations are not supported on remote PHY and we will
return -EOPNOTSUPP.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index c571da6..d7fad69 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -5363,17 +5363,25 @@ static int
 bnx2_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct bnx2 *bp = netdev_priv(dev);
+	int support_serdes = 0, support_copper = 0;
 
 	cmd->supported = SUPPORTED_Autoneg;
-	if (bp->phy_flags & PHY_SERDES_FLAG) {
+	if (bp->phy_flags & REMOTE_PHY_CAP_FLAG) {
+		support_serdes = 1;
+		support_copper = 1;
+	} else if (bp->phy_port == PORT_FIBRE)
+		support_serdes = 1;
+	else
+		support_copper = 1;
+
+	if (support_serdes) {
 		cmd->supported |= SUPPORTED_1000baseT_Full |
 			SUPPORTED_FIBRE;
 		if (bp->phy_flags & PHY_2_5G_CAPABLE_FLAG)
 			cmd->supported |= SUPPORTED_2500baseX_Full;
 
-		cmd->port = PORT_FIBRE;
 	}
-	else {
+	if (support_copper) {
 		cmd->supported |= SUPPORTED_10baseT_Half |
 			SUPPORTED_10baseT_Full |
 			SUPPORTED_100baseT_Half |
@@ -5381,9 +5389,10 @@ bnx2_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 			SUPPORTED_1000baseT_Full |
 			SUPPORTED_TP;
 
-		cmd->port = PORT_TP;
 	}
 
+	spin_lock_bh(&bp->phy_lock);
+	cmd->port = bp->phy_port;
 	cmd->advertising = bp->advertising;
 
 	if (bp->autoneg & AUTONEG_SPEED) {
@@ -5401,6 +5410,7 @@ bnx2_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 		cmd->speed = -1;
 		cmd->duplex = -1;
 	}
+	spin_unlock_bh(&bp->phy_lock);
 
 	cmd->transceiver = XCVR_INTERNAL;
 	cmd->phy_address = bp->phy_addr;
@@ -5416,6 +5426,15 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	u8 req_duplex = bp->req_duplex;
 	u16 req_line_speed = bp->req_line_speed;
 	u32 advertising = bp->advertising;
+	int err = -EINVAL;
+
+	spin_lock_bh(&bp->phy_lock);
+
+	if (cmd->port != PORT_TP && cmd->port != PORT_FIBRE)
+		goto err_out_unlock;
+
+	if (cmd->port != bp->phy_port && !(bp->phy_flags & REMOTE_PHY_CAP_FLAG))
+		goto err_out_unlock;
 
 	if (cmd->autoneg == AUTONEG_ENABLE) {
 		autoneg |= AUTONEG_SPEED;
@@ -5428,44 +5447,41 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 			(cmd->advertising == ADVERTISED_100baseT_Half) ||
 			(cmd->advertising == ADVERTISED_100baseT_Full)) {
 
-			if (bp->phy_flags & PHY_SERDES_FLAG)
-				return -EINVAL;
+			if (cmd->port == PORT_FIBRE)
+				goto err_out_unlock;
 
 			advertising = cmd->advertising;
 
 		} else if (cmd->advertising == ADVERTISED_2500baseX_Full) {
-			if (!(bp->phy_flags & PHY_2_5G_CAPABLE_FLAG))
-				return -EINVAL;
-		} else if (cmd->advertising == ADVERTISED_1000baseT_Full) {
+			if (!(bp->phy_flags & PHY_2_5G_CAPABLE_FLAG) ||
+			    (cmd->port == PORT_TP))
+				goto err_out_unlock;
+		} else if (cmd->advertising == ADVERTISED_1000baseT_Full)
 			advertising = cmd->advertising;
-		}
-		else if (cmd->advertising == ADVERTISED_1000baseT_Half) {
-			return -EINVAL;
-		}
+		else if (cmd->advertising == ADVERTISED_1000baseT_Half)
+			goto err_out_unlock;
 		else {
-			if (bp->phy_flags & PHY_SERDES_FLAG) {
+			if (cmd->port == PORT_FIBRE)
 				advertising = ETHTOOL_ALL_FIBRE_SPEED;
-			}
-			else {
+			else
 				advertising = ETHTOOL_ALL_COPPER_SPEED;
-			}
 		}
 		advertising |= ADVERTISED_Autoneg;
 	}
 	else {
-		if (bp->phy_flags & PHY_SERDES_FLAG) {
+		if (cmd->port == PORT_FIBRE) {
 			if ((cmd->speed != SPEED_1000 &&
 			     cmd->speed != SPEED_2500) ||
 			    (cmd->duplex != DUPLEX_FULL))
-				return -EINVAL;
+				goto err_out_unlock;
 
 			if (cmd->speed == SPEED_2500 &&
 			    !(bp->phy_flags & PHY_2_5G_CAPABLE_FLAG))
-				return -EINVAL;
-		}
-		else if (cmd->speed == SPEED_1000) {
-			return -EINVAL;
+				goto err_out_unlock;
 		}
+		else if (cmd->speed == SPEED_1000 || cmd->speed == SPEED_2500)
+			goto err_out_unlock;
+
 		autoneg &= ~AUTONEG_SPEED;
 		req_line_speed = cmd->speed;
 		req_duplex = cmd->duplex;
@@ -5477,13 +5493,12 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	bp->req_line_speed = req_line_speed;
 	bp->req_duplex = req_duplex;
 
-	spin_lock_bh(&bp->phy_lock);
-
-	bnx2_setup_phy(bp, bp->phy_port);
+	err = bnx2_setup_phy(bp, cmd->port);
 
+err_out_unlock:
 	spin_unlock_bh(&bp->phy_lock);
 
-	return 0;
+	return err;
 }
 
 static void
@@ -5610,6 +5625,14 @@ bnx2_nway_reset(struct net_device *dev)
 
 	spin_lock_bh(&bp->phy_lock);
 
+	if (bp->phy_flags & REMOTE_PHY_CAP_FLAG) {
+		int rc;
+
+		rc = bnx2_setup_remote_phy(bp, bp->phy_port);
+		spin_unlock_bh(&bp->phy_lock);
+		return rc;
+	}
+
 	/* Force a link down visible on the other side */
 	if (bp->phy_flags & PHY_SERDES_FLAG) {
 		bnx2_write_phy(bp, bp->mii_bmcr, BMCR_LOOPBACK);
@@ -6219,6 +6242,9 @@ bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	case SIOCGMIIREG: {
 		u32 mii_regval;
 
+		if (bp->phy_flags & REMOTE_PHY_CAP_FLAG)
+			return -EOPNOTSUPP;
+
 		if (!netif_running(dev))
 			return -EAGAIN;
 
@@ -6235,6 +6261,9 @@ bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
 
+		if (bp->phy_flags & REMOTE_PHY_CAP_FLAG)
+			return -EOPNOTSUPP;
+
 		if (!netif_running(dev))
 			return -EAGAIN;
 
-- 
cgit v0.10.2


From 9b1084b8f99a6b5e97c0c9bc1b4455db4fa51a07 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 7 Jul 2007 22:50:37 -0700
Subject: [BNX2]: Modify link up message.

Modify the link up dmesg to report remote copper or Serdes link.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index d7fad69..439f482 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -604,12 +604,21 @@ bnx2_report_fw_link(struct bnx2 *bp)
 	REG_WR_IND(bp, bp->shmem_base + BNX2_LINK_STATUS, fw_link_status);
 }
 
+static char *
+bnx2_xceiver_str(struct bnx2 *bp)
+{
+	return ((bp->phy_port == PORT_FIBRE) ? "SerDes" :
+		((bp->phy_flags & PHY_SERDES_FLAG) ? "Remote Copper" :
+		 "Copper"));
+}
+
 static void
 bnx2_report_link(struct bnx2 *bp)
 {
 	if (bp->link_up) {
 		netif_carrier_on(bp->dev);
-		printk(KERN_INFO PFX "%s NIC Link is Up, ", bp->dev->name);
+		printk(KERN_INFO PFX "%s NIC %s Link is Up, ", bp->dev->name,
+		       bnx2_xceiver_str(bp));
 
 		printk("%d Mbps ", bp->line_speed);
 
@@ -633,7 +642,8 @@ bnx2_report_link(struct bnx2 *bp)
 	}
 	else {
 		netif_carrier_off(bp->dev);
-		printk(KERN_ERR PFX "%s NIC Link is Down\n", bp->dev->name);
+		printk(KERN_ERR PFX "%s NIC %s Link is Down\n", bp->dev->name,
+		       bnx2_xceiver_str(bp));
 	}
 
 	bnx2_report_fw_link(bp);
-- 
cgit v0.10.2


From b8a7ce7bedb2134acb731e08e588ad92087a40ff Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 7 Jul 2007 22:51:03 -0700
Subject: [BNX2]: Reduce spurious INTA interrupts.

Spurious interrupts are often encountered especially on systems
using the 8259 PIC mode.  This is because the I/O write to deassert
the interrupt is posted and won't get to the chip immediately.  As
a result, the IRQ may remain asserted after the IRQ handler exits,
causing spurious interrupts.

Add read back to flush the I/O write to deassert the IRQ immediately.
We also store the last_status_idx immediately in the IRQ handler to
help detect whether the interrupt is ours or not when the IRQ is
entered again before ->poll gets called.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 439f482..a806a8e 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -2547,6 +2547,7 @@ bnx2_interrupt(int irq, void *dev_instance)
 {
 	struct net_device *dev = dev_instance;
 	struct bnx2 *bp = netdev_priv(dev);
+	struct status_block *sblk = bp->status_blk;
 
 	/* When using INTx, it is possible for the interrupt to arrive
 	 * at the CPU before the status block posted prior to the
@@ -2554,7 +2555,7 @@ bnx2_interrupt(int irq, void *dev_instance)
 	 * When using MSI, the MSI message will always complete after
 	 * the status block write.
 	 */
-	if ((bp->status_blk->status_idx == bp->last_status_idx) &&
+	if ((sblk->status_idx == bp->last_status_idx) &&
 	    (REG_RD(bp, BNX2_PCICFG_MISC_STATUS) &
 	     BNX2_PCICFG_MISC_STATUS_INTA_VALUE))
 		return IRQ_NONE;
@@ -2563,11 +2564,19 @@ bnx2_interrupt(int irq, void *dev_instance)
 		BNX2_PCICFG_INT_ACK_CMD_USE_INT_HC_PARAM |
 		BNX2_PCICFG_INT_ACK_CMD_MASK_INT);
 
+	/* Read back to deassert IRQ immediately to avoid too many
+	 * spurious interrupts.
+	 */
+	REG_RD(bp, BNX2_PCICFG_INT_ACK_CMD);
+
 	/* Return here if interrupt is shared and is disabled. */
 	if (unlikely(atomic_read(&bp->intr_sem) != 0))
 		return IRQ_HANDLED;
 
-	netif_rx_schedule(dev);
+	if (netif_rx_schedule_prep(dev)) {
+		bp->last_status_idx = sblk->status_idx;
+		__netif_rx_schedule(dev);
+	}
 
 	return IRQ_HANDLED;
 }
-- 
cgit v0.10.2


From df149d70e1f34ec4995c8a703dbde38071ff4a05 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 7 Jul 2007 22:51:36 -0700
Subject: [BNX2]: Enhance the heartbeat.

In addition to the periodic heartbeat, we're adding a heartbeat
request interrupt when the heartbeat is late.  This is needed during
netpoll where the timer is not available.  -rt kernels will also
benefit since the timer is not as accurate.

[ We discussed this patch last time and we decided that the -rt
  kernel problem alone did not justify this patch.  I think the
  netpoll problem makes this patch necessary. ]

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index a806a8e..e755104 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -1487,6 +1487,20 @@ bnx2_set_default_link(struct bnx2 *bp)
 }
 
 static void
+bnx2_send_heart_beat(struct bnx2 *bp)
+{
+	u32 msg;
+	u32 addr;
+
+	spin_lock(&bp->indirect_lock);
+	msg = (u32) (++bp->fw_drv_pulse_wr_seq & BNX2_DRV_PULSE_SEQ_MASK);
+	addr = bp->shmem_base + BNX2_DRV_PULSE_MB;
+	REG_WR(bp, BNX2_PCICFG_REG_WINDOW_ADDRESS, addr);
+	REG_WR(bp, BNX2_PCICFG_REG_WINDOW, msg);
+	spin_unlock(&bp->indirect_lock);
+}
+
+static void
 bnx2_remote_phy_event(struct bnx2 *bp)
 {
 	u32 msg;
@@ -1495,6 +1509,11 @@ bnx2_remote_phy_event(struct bnx2 *bp)
 
 	msg = REG_RD_IND(bp, bp->shmem_base + BNX2_LINK_STATUS);
 
+	if (msg & BNX2_LINK_STATUS_HEART_BEAT_EXPIRED)
+		bnx2_send_heart_beat(bp);
+
+	msg &= ~BNX2_LINK_STATUS_HEART_BEAT_EXPIRED;
+
 	if ((msg & BNX2_LINK_STATUS_LINK_UP) == BNX2_LINK_STATUS_LINK_DOWN)
 		bp->link_up = 0;
 	else {
@@ -1572,6 +1591,7 @@ bnx2_set_remote_link(struct bnx2 *bp)
 			break;
 		case BNX2_FW_EVT_CODE_SW_TIMER_EXPIRATION_EVENT:
 		default:
+			bnx2_send_heart_beat(bp);
 			break;
 	}
 	return 0;
@@ -4122,7 +4142,7 @@ bnx2_init_chip(struct bnx2 *bp)
 	rc = bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT2 | BNX2_DRV_MSG_CODE_RESET,
 			  0);
 
-	REG_WR(bp, BNX2_MISC_ENABLE_SET_BITS, 0x5ffffff);
+	REG_WR(bp, BNX2_MISC_ENABLE_SET_BITS, BNX2_MISC_ENABLE_DEFAULT);
 	REG_RD(bp, BNX2_MISC_ENABLE_SET_BITS);
 
 	udelay(20);
@@ -4930,7 +4950,6 @@ static void
 bnx2_timer(unsigned long data)
 {
 	struct bnx2 *bp = (struct bnx2 *) data;
-	u32 msg;
 
 	if (!netif_running(bp->dev))
 		return;
@@ -4938,8 +4957,7 @@ bnx2_timer(unsigned long data)
 	if (atomic_read(&bp->intr_sem) != 0)
 		goto bnx2_restart_timer;
 
-	msg = (u32) ++bp->fw_drv_pulse_wr_seq;
-	REG_WR_IND(bp, bp->shmem_base + BNX2_DRV_PULSE_MB, msg);
+	bnx2_send_heart_beat(bp);
 
 	bp->stats_blk->stat_FwRxDrop = REG_RD_IND(bp, BNX2_FW_RX_DROP_COUNT);
 
diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h
index 14c0a1e..6dca333 100644
--- a/drivers/net/bnx2.h
+++ b/drivers/net/bnx2.h
@@ -6338,6 +6338,8 @@ struct l2_fhdr {
 
 #define RX_COPY_THRESH			92
 
+#define BNX2_MISC_ENABLE_DEFAULT	0x7ffffff
+
 #define DMA_READ_CHANS	5
 #define DMA_WRITE_CHANS	3
 
@@ -6839,6 +6841,7 @@ struct fw_info {
 #define BNX2_LINK_STATUS_SERDES_LINK		 (1<<20)
 #define BNX2_LINK_STATUS_PARTNER_AD_2500FULL	 (1<<21)
 #define BNX2_LINK_STATUS_PARTNER_AD_2500HALF	 (1<<22)
+#define BNX2_LINK_STATUS_HEART_BEAT_EXPIRED	 (1<<31)
 
 #define BNX2_DRV_PULSE_MB			0x00000010
 #define BNX2_DRV_PULSE_SEQ_MASK			 0x00007fff
-- 
cgit v0.10.2


From 58fc2ea4057511f84deaa182b1a3612327ecbf2d Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 7 Jul 2007 22:52:02 -0700
Subject: [BNX2]: Print management firmware version.

Add management firmware version for ethtool -i.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index e755104..406b2e1 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -5546,11 +5546,7 @@ bnx2_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 	strcpy(info->driver, DRV_MODULE_NAME);
 	strcpy(info->version, DRV_MODULE_VERSION);
 	strcpy(info->bus_info, pci_name(bp->pdev));
-	info->fw_version[0] = ((bp->fw_ver & 0xff000000) >> 24) + '0';
-	info->fw_version[2] = ((bp->fw_ver & 0xff0000) >> 16) + '0';
-	info->fw_version[4] = ((bp->fw_ver & 0xff00) >> 8) + '0';
-	info->fw_version[1] = info->fw_version[3] = '.';
-	info->fw_version[5] = 0;
+	strcpy(info->fw_version, bp->fw_version);
 }
 
 #define BNX2_REGDUMP_LEN		(32 * 1024)
@@ -6462,7 +6458,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 {
 	struct bnx2 *bp;
 	unsigned long mem_len;
-	int rc;
+	int rc, i, j;
 	u32 reg;
 	u64 dma_mask, persist_dma_mask;
 
@@ -6619,7 +6615,35 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 		goto err_out_unmap;
 	}
 
-	bp->fw_ver = REG_RD_IND(bp, bp->shmem_base + BNX2_DEV_INFO_BC_REV);
+	reg = REG_RD_IND(bp, bp->shmem_base + BNX2_DEV_INFO_BC_REV);
+	for (i = 0, j = 0; i < 3; i++) {
+		u8 num, k, skip0;
+
+		num = (u8) (reg >> (24 - (i * 8)));
+		for (k = 100, skip0 = 1; k >= 1; num %= k, k /= 10) {
+			if (num >= k || !skip0 || k == 1) {
+				bp->fw_version[j++] = (num / k) + '0';
+				skip0 = 0;
+			}
+		}
+		if (i != 2)
+			bp->fw_version[j++] = '.';
+	}
+	reg = REG_RD_IND(bp, bp->shmem_base + BNX2_BC_STATE_CONDITION);
+	reg &= BNX2_CONDITION_MFW_RUN_MASK;
+	if (reg != BNX2_CONDITION_MFW_RUN_UNKNOWN &&
+	    reg != BNX2_CONDITION_MFW_RUN_NONE) {
+		int i;
+		u32 addr = REG_RD_IND(bp, bp->shmem_base + BNX2_MFW_VER_PTR);
+
+		bp->fw_version[j++] = ' ';
+		for (i = 0; i < 3; i++) {
+			reg = REG_RD_IND(bp, addr + i * 4);
+			reg = swab32(reg);
+			memcpy(&bp->fw_version[j], &reg, 4);
+			j += 4;
+		}
+	}
 
 	reg = REG_RD_IND(bp, bp->shmem_base + BNX2_PORT_HW_CFG_MAC_UPPER);
 	bp->mac_addr[0] = (u8) (reg >> 8);
diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h
index 6dca333..d8cd1af 100644
--- a/drivers/net/bnx2.h
+++ b/drivers/net/bnx2.h
@@ -6660,7 +6660,7 @@ struct bnx2 {
 
 	u32			shmem_base;
 
-	u32			fw_ver;
+	char			fw_version[32];
 
 	int			pm_cap;
 	int			pcix_cap;
@@ -7036,6 +7036,8 @@ struct fw_info {
 #define BNX2_PORT_FEATURE_MBA_VLAN_TAG_MASK	 0xffff
 #define BNX2_PORT_FEATURE_MBA_VLAN_ENABLE	 0x10000
 
+#define BNX2_MFW_VER_PTR			0x00000014c
+
 #define BNX2_BC_STATE_RESET_TYPE		0x000001c0
 #define BNX2_BC_STATE_RESET_TYPE_SIG		 0x00005254
 #define BNX2_BC_STATE_RESET_TYPE_SIG_MASK	 0x0000ffff
@@ -7089,6 +7091,14 @@ struct fw_info {
 #define BNX2_BC_STATE_ERR_NO_RXP		 (BNX2_BC_STATE_SIGN | 0x0600)
 #define BNX2_BC_STATE_ERR_TOO_MANY_RBUF		 (BNX2_BC_STATE_SIGN | 0x0700)
 
+#define BNX2_BC_STATE_CONDITION			0x000001c8
+#define BNX2_CONDITION_MFW_RUN_UNKNOWN		 0x00000000
+#define BNX2_CONDITION_MFW_RUN_IPMI		 0x00002000
+#define BNX2_CONDITION_MFW_RUN_UMP		 0x00004000
+#define BNX2_CONDITION_MFW_RUN_NCSI		 0x00006000
+#define BNX2_CONDITION_MFW_RUN_NONE		 0x0000e000
+#define BNX2_CONDITION_MFW_RUN_MASK		 0x0000e000
+
 #define BNX2_BC_STATE_DEBUG_CMD			0x1dc
 #define BNX2_BC_STATE_BC_DBG_CMD_SIGNATURE	 0x42440000
 #define BNX2_BC_STATE_BC_DBG_CMD_SIGNATURE_MASK	 0xffff0000
-- 
cgit v0.10.2


From 3a334b34b62d94be0275e5c76fa153cf7fd42770 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 7 Jul 2007 22:52:37 -0700
Subject: [BNX2]: Update version to 1.6.2.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 406b2e1..9181402 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -54,8 +54,8 @@
 
 #define DRV_MODULE_NAME		"bnx2"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"1.5.11"
-#define DRV_MODULE_RELDATE	"June 4, 2007"
+#define DRV_MODULE_VERSION	"1.6.2"
+#define DRV_MODULE_RELDATE	"July 6, 2007"
 
 #define RUN_AT(x) (jiffies + (x))
 
-- 
cgit v0.10.2


From 963bd949b12158d9b5380b718b31c4b33372ed73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 7 Jul 2007 22:54:56 -0700
Subject: [BNX2]: Seems to not need net/tcp.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Got bored to always recompile it for no reason.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 9181402..d681903 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -40,7 +40,6 @@
 #define BCM_VLAN 1
 #endif
 #include <net/ip.h>
-#include <net/tcp.h>
 #include <net/checksum.h>
 #include <linux/workqueue.h>
 #include <linux/crc32.h>
-- 
cgit v0.10.2


From 1722933323b70f44b0548131604f1f3454c2aa8f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Sat, 7 Jul 2007 22:59:14 -0700
Subject: [NET]: netdevice locking assumptions documentation

Update the documentation about locking assumptions.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
index ce1361f..a6cab67 100644
--- a/Documentation/networking/netdevices.txt
+++ b/Documentation/networking/netdevices.txt
@@ -20,7 +20,6 @@ private data which gets freed when the network device is freed. If
 separately allocated data is attached to the network device
 (dev->priv) then it is up to the module exit handler to free that.
 
-
 struct net_device synchronization rules
 =======================================
 dev->open:
@@ -43,16 +42,17 @@ dev->get_stats:
 
 dev->hard_start_xmit:
 	Synchronization: netif_tx_lock spinlock.
+
 	When the driver sets NETIF_F_LLTX in dev->features this will be
 	called without holding netif_tx_lock. In this case the driver
 	has to lock by itself when needed. It is recommended to use a try lock
-	for this and return -1 when the spin lock fails. 
+	for this and return NETDEV_TX_LOCKED when the spin lock fails.
 	The locking there should also properly protect against 
-	set_multicast_list
-	Context: Process with BHs disabled or BH (timer).
-	Notes: netif_queue_stopped() is guaranteed false
-               Interrupts must be enabled when calling hard_start_xmit.
-                (Interrupts must also be enabled when enabling the BH handler.)
+	set_multicast_list.
+
+	Context: Process with BHs disabled or BH (timer),
+	         will be called with interrupts disabled by netconsole.
+
 	Return codes: 
 	o NETDEV_TX_OK everything ok. 
 	o NETDEV_TX_BUSY Cannot transmit packet, try later 
@@ -74,4 +74,5 @@ dev->poll:
 	Synchronization: __LINK_STATE_RX_SCHED bit in dev->state.  See
 		dev_close code and comments in net/core/dev.c for more info.
 	Context: softirq
+	         will be called with interrupts disabled by netconsole.
 
-- 
cgit v0.10.2


From 1c8c7d64169dc4b1ae3d8cd1bf35ea0a099b50ad Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Sat, 7 Jul 2007 23:03:44 -0700
Subject: [NET]: netdevice mtu assumptions documentation

Document the expectations about device MTU handling.
The documentation about oversize packet handling is probably too
loose.

IMHO devices should drop oversize packets for robustness,
but many devices allow it now. For example, if you set mtu to 1200
bytes, most ether devices will allow a 1500 byte frame in.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
index a6cab67..3786929 100644
--- a/Documentation/networking/netdevices.txt
+++ b/Documentation/networking/netdevices.txt
@@ -20,6 +20,31 @@ private data which gets freed when the network device is freed. If
 separately allocated data is attached to the network device
 (dev->priv) then it is up to the module exit handler to free that.
 
+MTU
+===
+Each network device has a Maximum Transfer Unit. The MTU does not
+include any link layer protocol overhead. Upper layer protocols must
+not pass a socket buffer (skb) to a device to transmit with more data
+than the mtu. The MTU does not include link layer header overhead, so
+for example on Ethernet if the standard MTU is 1500 bytes used, the
+actual skb will contain up to 1514 bytes because of the Ethernet
+header. Devices should allow for the 4 byte VLAN header as well.
+
+Segmentation Offload (GSO, TSO) is an exception to this rule.  The
+upper layer protocol may pass a large socket buffer to the device
+transmit routine, and the device will break that up into separate
+packets based on the current MTU.
+
+MTU is symmetrical and applies both to receive and transmit. A device
+must be able to receive at least the maximum size packet allowed by
+the MTU. A network device may use the MTU as mechanism to size receive
+buffers, but the device should allow packets with VLAN header. With
+standard Ethernet mtu of 1500 bytes, the device should allow up to
+1518 byte packets (1500 + 14 header + 4 tag).  The device may either:
+drop, truncate, or pass up oversize packets, but dropping oversize
+packets is preferred.
+
+
 struct net_device synchronization rules
 =======================================
 dev->open:
-- 
cgit v0.10.2


From 9af97186fcc9a1d9bbf195eb4bc2399d0dd66223 Mon Sep 17 00:00:00 2001
From: Pavel Emelianov <xemul@openvz.org>
Date: Mon, 9 Jul 2007 13:12:24 -0700
Subject: [ATM] br2684: Use seq_list_xxx helpers

The .show callback receives the list_head pointer now, not the struct
br2684_dev one.

Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 0e9f00c..3e26438 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -699,28 +699,13 @@ static struct atm_ioctl br2684_ioctl_ops = {
 #ifdef CONFIG_PROC_FS
 static void *br2684_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	loff_t offs = 0;
-	struct br2684_dev *brd;
-
 	read_lock(&devs_lock);
-
-	list_for_each_entry(brd, &br2684_devs, br2684_devs) {
-		if (offs == *pos)
-			return brd;
-		++offs;
-	}
-	return NULL;
+	return seq_list_start(&br2684_devs, *pos);
 }
 
 static void *br2684_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct br2684_dev *brd = v;
-
-	++*pos;
-
-	brd = list_entry(brd->br2684_devs.next,
-			 struct br2684_dev, br2684_devs);
-	return (&brd->br2684_devs != &br2684_devs) ? brd : NULL;
+	return seq_list_next(v, &br2684_devs, pos);
 }
 
 static void br2684_seq_stop(struct seq_file *seq, void *v)
@@ -730,7 +715,8 @@ static void br2684_seq_stop(struct seq_file *seq, void *v)
 
 static int br2684_seq_show(struct seq_file *seq, void *v)
 {
-	const struct br2684_dev *brdev = v;
+	const struct br2684_dev *brdev = list_entry(v, struct br2684_dev,
+			br2684_devs);
 	const struct net_device *net_dev = brdev->net_dev;
 	const struct br2684_vcc *brvcc;
 
-- 
cgit v0.10.2


From 60f0438a87cfd9f5faa439ca419497cd64e4c59e Mon Sep 17 00:00:00 2001
From: Pavel Emelianov <xemul@openvz.org>
Date: Mon, 9 Jul 2007 13:15:14 -0700
Subject: [NET]: Make some network-related proc files use seq_list_xxx helpers

This includes /proc/net/protocols, /proc/net/rxrpc_calls and
/proc/net/rxrpc_connections files.

All three need seq_list_start_head to show some header.

Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/sock.c b/net/core/sock.c
index c14ce01..252d21a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1851,46 +1851,15 @@ void proto_unregister(struct proto *prot)
 EXPORT_SYMBOL(proto_unregister);
 
 #ifdef CONFIG_PROC_FS
-static inline struct proto *__proto_head(void)
-{
-	return list_entry(proto_list.next, struct proto, node);
-}
-
-static inline struct proto *proto_head(void)
-{
-	return list_empty(&proto_list) ? NULL : __proto_head();
-}
-
-static inline struct proto *proto_next(struct proto *proto)
-{
-	return proto->node.next == &proto_list ? NULL :
-		list_entry(proto->node.next, struct proto, node);
-}
-
-static inline struct proto *proto_get_idx(loff_t pos)
-{
-	struct proto *proto;
-	loff_t i = 0;
-
-	list_for_each_entry(proto, &proto_list, node)
-		if (i++ == pos)
-			goto out;
-
-	proto = NULL;
-out:
-	return proto;
-}
-
 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	read_lock(&proto_list_lock);
-	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
+	return seq_list_start_head(&proto_list, *pos);
 }
 
 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	++*pos;
-	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
+	return seq_list_next(v, &proto_list, pos);
 }
 
 static void proto_seq_stop(struct seq_file *seq, void *v)
@@ -1938,7 +1907,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
 
 static int proto_seq_show(struct seq_file *seq, void *v)
 {
-	if (v == SEQ_START_TOKEN)
+	if (v == &proto_list)
 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
 			   "protocol",
 			   "size",
@@ -1950,7 +1919,7 @@ static int proto_seq_show(struct seq_file *seq, void *v)
 			   "module",
 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
 	else
-		proto_seq_printf(seq, v);
+		proto_seq_printf(seq, list_entry(v, struct proto, node));
 	return 0;
 }
 
diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c
index 1c0be0e..77cc5fb 100644
--- a/net/rxrpc/ar-proc.c
+++ b/net/rxrpc/ar-proc.c
@@ -30,31 +30,13 @@ static const char *rxrpc_conn_states[] = {
  */
 static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
 {
-	struct list_head *_p;
-	loff_t pos = *_pos;
-
 	read_lock(&rxrpc_call_lock);
-	if (!pos)
-		return SEQ_START_TOKEN;
-	pos--;
-
-	list_for_each(_p, &rxrpc_calls)
-		if (!pos--)
-			break;
-
-	return _p != &rxrpc_calls ? _p : NULL;
+	return seq_list_start_head(&rxrpc_calls, *_pos);
 }
 
 static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct list_head *_p;
-
-	(*pos)++;
-
-	_p = v;
-	_p = (v == SEQ_START_TOKEN) ? rxrpc_calls.next : _p->next;
-
-	return _p != &rxrpc_calls ? _p : NULL;
+	return seq_list_next(v, &rxrpc_calls, pos);
 }
 
 static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
@@ -68,7 +50,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 	struct rxrpc_call *call;
 	char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
 
-	if (v == SEQ_START_TOKEN) {
+	if (v == &rxrpc_calls) {
 		seq_puts(seq,
 			 "Proto Local                  Remote                "
 			 " SvID ConnID   CallID   End Use State    Abort   "
@@ -129,32 +111,14 @@ struct file_operations rxrpc_call_seq_fops = {
  */
 static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
 {
-	struct list_head *_p;
-	loff_t pos = *_pos;
-
 	read_lock(&rxrpc_connection_lock);
-	if (!pos)
-		return SEQ_START_TOKEN;
-	pos--;
-
-	list_for_each(_p, &rxrpc_connections)
-		if (!pos--)
-			break;
-
-	return _p != &rxrpc_connections ? _p : NULL;
+	return seq_list_start_head(&rxrpc_connections, *_pos);
 }
 
 static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
 				       loff_t *pos)
 {
-	struct list_head *_p;
-
-	(*pos)++;
-
-	_p = v;
-	_p = (v == SEQ_START_TOKEN) ? rxrpc_connections.next : _p->next;
-
-	return _p != &rxrpc_connections ? _p : NULL;
+	return seq_list_next(v, &rxrpc_connections, pos);
 }
 
 static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
@@ -168,7 +132,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
 	struct rxrpc_transport *trans;
 	char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
 
-	if (v == SEQ_START_TOKEN) {
+	if (v == &rxrpc_connections) {
 		seq_puts(seq,
 			 "Proto Local                  Remote                "
 			 " SvID ConnID   Calls    End Use State    Key     "
-- 
cgit v0.10.2


From 6f11df8355e8f59f7572bf6ac1f63d692483b0c6 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 9 Jul 2007 13:16:00 -0700
Subject: [NET]: "wrong timeout value in sk_wait_data()": cleanups

- save 4 bytes

- it's read-mostly.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Vasily Averin <vvs@sw.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/sock.c b/net/core/sock.c
index 252d21a..091032a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -210,7 +210,8 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 		return -EDOM;
 
 	if (tv.tv_sec < 0) {
-		static int warned = 0;
+		static int warned __read_mostly;
+
 		*timeo_p = 0;
 		if (warned < 10 && net_ratelimit())
 			warned++;
-- 
cgit v0.10.2


From aa4291108f434a183207e645379414270118dccb Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias.kaehlcke@gmail.com>
Date: Mon, 9 Jul 2007 13:18:12 -0700
Subject: [IRDA]: use mutex instead of semaphore in VLSI 82C147 IrDA controller
 driver

The VLSI 82C147 IrDA controller driver uses a semaphore as mutex.  Use the
mutex API instead of the (binary) semaphore.

Signed-off-by: Matthias Kaehlcke <matthias.kaehlcke@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index bf78ef1..0538ca9 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -44,6 +44,7 @@ MODULE_LICENSE("GPL");
 #include <linux/time.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
 
@@ -1660,8 +1661,8 @@ vlsi_irda_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	idev = ndev->priv;
 
 	spin_lock_init(&idev->lock);
-	init_MUTEX(&idev->sem);
-	down(&idev->sem);
+	mutex_init(&idev->mtx);
+	mutex_lock(&idev->mtx);
 	idev->pdev = pdev;
 
 	if (vlsi_irda_init(ndev) < 0)
@@ -1689,12 +1690,12 @@ vlsi_irda_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	IRDA_MESSAGE("%s: registered device %s\n", drivername, ndev->name);
 
 	pci_set_drvdata(pdev, ndev);
-	up(&idev->sem);
+	mutex_unlock(&idev->mtx);
 
 	return 0;
 
 out_freedev:
-	up(&idev->sem);
+	mutex_unlock(&idev->mtx);
 	free_netdev(ndev);
 out_disable:
 	pci_disable_device(pdev);
@@ -1716,12 +1717,12 @@ static void __devexit vlsi_irda_remove(struct pci_dev *pdev)
 	unregister_netdev(ndev);
 
 	idev = ndev->priv;
-	down(&idev->sem);
+	mutex_lock(&idev->mtx);
 	if (idev->proc_entry) {
 		remove_proc_entry(ndev->name, vlsi_proc_root);
 		idev->proc_entry = NULL;
 	}
-	up(&idev->sem);
+	mutex_unlock(&idev->mtx);
 
 	free_netdev(ndev);
 
@@ -1751,7 +1752,7 @@ static int vlsi_irda_suspend(struct pci_dev *pdev, pm_message_t state)
 		return 0;
 	}
 	idev = ndev->priv;	
-	down(&idev->sem);
+	mutex_lock(&idev->mtx);
 	if (pdev->current_state != 0) {			/* already suspended */
 		if (state.event > pdev->current_state) {	/* simply go deeper */
 			pci_set_power_state(pdev, pci_choose_state(pdev, state));
@@ -1759,7 +1760,7 @@ static int vlsi_irda_suspend(struct pci_dev *pdev, pm_message_t state)
 		}
 		else
 			IRDA_ERROR("%s - %s: invalid suspend request %u -> %u\n", __FUNCTION__, pci_name(pdev), pdev->current_state, state.event);
-		up(&idev->sem);
+		mutex_unlock(&idev->mtx);
 		return 0;
 	}
 
@@ -1775,7 +1776,7 @@ static int vlsi_irda_suspend(struct pci_dev *pdev, pm_message_t state)
 	pci_set_power_state(pdev, pci_choose_state(pdev, state));
 	pdev->current_state = state.event;
 	idev->resume_ok = 1;
-	up(&idev->sem);
+	mutex_unlock(&idev->mtx);
 	return 0;
 }
 
@@ -1790,9 +1791,9 @@ static int vlsi_irda_resume(struct pci_dev *pdev)
 		return 0;
 	}
 	idev = ndev->priv;	
-	down(&idev->sem);
+	mutex_lock(&idev->mtx);
 	if (pdev->current_state == 0) {
-		up(&idev->sem);
+		mutex_unlock(&idev->mtx);
 		IRDA_WARNING("%s - %s: already resumed\n",
 			     __FUNCTION__, pci_name(pdev));
 		return 0;
@@ -1814,7 +1815,7 @@ static int vlsi_irda_resume(struct pci_dev *pdev)
 		 * device and independently resume_ok should catch any garbage config.
 		 */
 		IRDA_WARNING("%s - hm, nothing to resume?\n", __FUNCTION__);
-		up(&idev->sem);
+		mutex_unlock(&idev->mtx);
 		return 0;
 	}
 
@@ -1824,7 +1825,7 @@ static int vlsi_irda_resume(struct pci_dev *pdev)
 		netif_device_attach(ndev);
 	}
 	idev->resume_ok = 0;
-	up(&idev->sem);
+	mutex_unlock(&idev->mtx);
 	return 0;
 }
 
diff --git a/drivers/net/irda/vlsi_ir.h b/drivers/net/irda/vlsi_ir.h
index 2d3b773..ca12a60 100644
--- a/drivers/net/irda/vlsi_ir.h
+++ b/drivers/net/irda/vlsi_ir.h
@@ -728,7 +728,7 @@ typedef struct vlsi_irda_dev {
 	struct timeval		last_rx;
 
 	spinlock_t		lock;
-	struct semaphore	sem;
+	struct mutex		mtx;
 
 	u8			resume_ok;	
 	struct proc_dir_entry	*proc_entry;
-- 
cgit v0.10.2


From 4fda25a2cd7a18e0ef9f29ba3dd6f6cd9b7ca43f Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 9 Jul 2007 13:18:57 -0700
Subject: [DCCP]: Make struct dccp_li_cachep static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 01c1edb..515225f 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -27,7 +27,7 @@ struct dccp_li_hist_entry {
 	u32		 dccplih_interval;
 };
 
-struct kmem_cache *dccp_li_cachep __read_mostly;
+static struct kmem_cache *dccp_li_cachep __read_mostly;
 
 static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio)
 {
-- 
cgit v0.10.2


From 0236e667e188af0336cd776e5b54c1f3fd19a03c Mon Sep 17 00:00:00 2001
From: Dan Aloni <da-x@monatomic.org>
Date: Mon, 9 Jul 2007 13:20:12 -0700
Subject: [NETFILTER] net/ipv4/netfilter/ip_tables.c: lower printk severity

Signed-off-by: Dan Aloni <da-x@monatomic.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2ba5bd9..e1b402c 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2331,7 +2331,7 @@ static int __init ip_tables_init(void)
 	if (ret < 0)
 		goto err5;
 
-	printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
+	printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n");
 	return 0;
 
 err5:
-- 
cgit v0.10.2


From 5f1de3ec661e7b08348f565b7ca17586e7e94fc5 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Mon, 9 Jul 2007 13:20:54 -0700
Subject: [RXRPC]: Remove Makefile reference to obsolete RXRPC config variable

Since there is no Kconfig variable RXRPC anywhere in the tree, and the
variable AF_RXRPC performs exactly the same function, remove the
reference to CONFIG_RXRPC from net/Makefile.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/Makefile b/net/Makefile
index 34e5b2d..a87a889 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -37,7 +37,6 @@ obj-$(CONFIG_AX25)		+= ax25/
 obj-$(CONFIG_IRDA)		+= irda/
 obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
-obj-$(CONFIG_RXRPC)		+= rxrpc/
 obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
 obj-$(CONFIG_ATM)		+= atm/
 obj-$(CONFIG_DECNET)		+= decnet/
-- 
cgit v0.10.2


From 1498b3f1952ae539a7d5c356acf942d5f4c1aece Mon Sep 17 00:00:00 2001
From: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Date: Mon, 9 Jul 2007 15:22:23 -0700
Subject: [NETPOLL]: Fix a leak-n-bug in netpoll_cleanup()

93ec2c723e3f8a216dde2899aeb85c648672bc6b applied excessive duct tape to
the netpoll beast's netpoll_cleanup(), thus substituting one leak with
another, and opening up a little buglet :-)

net_device->npinfo (netpoll_info) is a shared and refcounted object and
cannot simply be set NULL the first time netpoll_cleanup() is called.
Otherwise, further netpoll_cleanup()'s see np->dev->npinfo == NULL and
become no-ops, thus leaking. And it's a bug too: the first call to
netpoll_cleanup() would thus (annoyingly) "disable" other (still alive)
netpolls too. Maybe nobody noticed this because netconsole (only user
of netpoll) never supported multiple netpoll objects earlier.

This is a trivial and obvious one-line fixlet.

Signed-off-by: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 4b06d19..de1b26a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -783,7 +783,6 @@ void netpoll_cleanup(struct netpoll *np)
 				spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 			}
 
-			np->dev->npinfo = NULL;
 			if (atomic_dec_and_test(&npinfo->refcnt)) {
 				skb_queue_purge(&npinfo->arp_tx);
 				skb_queue_purge(&npinfo->txq);
@@ -796,6 +795,7 @@ void netpoll_cleanup(struct netpoll *np)
 					kfree_skb(skb);
 				}
 				kfree(npinfo);
+				np->dev->npinfo = NULL;
 			}
 		}
 
-- 
cgit v0.10.2


From 6b25d30bf112370a12d05c3c0fd43732985dab01 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 9 Jul 2007 15:30:19 -0700
Subject: [NET]: Fix gen_estimator timer removal race

As noticed by Jarek Poplawski <jarkao2@o2.pl>, the timer removal in
gen_kill_estimator races with the timer function rearming the timer.

Check whether the timer list is empty before rearming the timer
in the timer function to fix this.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jarek Poplawski <jarkao2@o2.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 17daf4c..cc84d8d 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -128,7 +128,8 @@ static void est_timer(unsigned long arg)
 		spin_unlock(e->stats_lock);
 	}
 
-	mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+	if (elist[idx].list != NULL)
+		mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
 	read_unlock(&est_lock);
 }
 
-- 
cgit v0.10.2


From 4839c52b01ca91be1c62761e08fb3deb3881e857 Mon Sep 17 00:00:00 2001
From: Philippe De Muyter <phdm@macqel.be>
Date: Mon, 9 Jul 2007 15:32:57 -0700
Subject: [IPV4]: Make ip_tos2prio const.

Signed-off-by: Philippe De Muyter <phdm@macqel.be>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/route.h b/include/net/route.h
index 188b893..f7ce625 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -135,7 +135,7 @@ static inline void ip_rt_put(struct rtable * rt)
 
 #define IPTOS_RT_MASK	(IPTOS_TOS_MASK & ~3)
 
-extern __u8 ip_tos2prio[16];
+extern const __u8 ip_tos2prio[16];
 
 static inline char rt_tos2priority(u8 tos)
 {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8528502..88fa648 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -167,7 +167,7 @@ static struct dst_ops ipv4_dst_ops = {
 
 #define ECN_OR_COST(class)	TC_PRIO_##class
 
-__u8 ip_tos2prio[16] = {
+const __u8 ip_tos2prio[16] = {
 	TC_PRIO_BESTEFFORT,
 	ECN_OR_COST(FILLER),
 	TC_PRIO_BESTEFFORT,
-- 
cgit v0.10.2


From cfbba49d80be6cf8d3872b66fc5421f119843b36 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 9 Jul 2007 15:33:40 -0700
Subject: [NET]: Avoid copying writable clones in tunnel drivers

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6328293..5c14ed6 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -809,7 +809,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
 
-	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
+	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 		if (!new_skb) {
 			ip_rt_put(rt);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ebd2f2d..3964372 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -595,7 +595,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	 */
 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 
-	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
+	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 		if (!new_skb) {
 			ip_rt_put(rt);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a0902fb..281aee4 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -883,8 +883,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	 */
 	max_headroom += LL_RESERVED_SPACE(tdev);
 
-	if (skb_headroom(skb) < max_headroom ||
-	    skb_cloned(skb) || skb_shared(skb)) {
+	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
+	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 		struct sk_buff *new_skb;
 
 		if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 1efa95a..eb20bb6 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -532,7 +532,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	 */
 	max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);
 
-	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
+	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 		if (!new_skb) {
 			ip_rt_put(rt);
-- 
cgit v0.10.2


From 5b7f990927fe87ad3bec762a33c0e72bcbf6841e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 11 Jul 2007 09:51:55 +0200
Subject: [Bluetooth] Add basics to better support and handle eSCO links

To better support and handle eSCO links in the future a bunch of
constants needs to be added and some basic routines need to be
updated. This is the initial step.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 93ce272..ebfb96b 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -107,14 +107,14 @@ enum {
 #define HCI_IDLE_TIMEOUT	(6000)	/* 6 seconds */
 #define HCI_INIT_TIMEOUT	(10000)	/* 10 seconds */
 
-/* HCI Packet types */
+/* HCI data types */
 #define HCI_COMMAND_PKT		0x01
 #define HCI_ACLDATA_PKT		0x02
 #define HCI_SCODATA_PKT		0x03
 #define HCI_EVENT_PKT		0x04
 #define HCI_VENDOR_PKT		0xff
 
-/* HCI Packet types */
+/* HCI packet types */
 #define HCI_DM1		0x0008
 #define HCI_DM3		0x0400
 #define HCI_DM5		0x4000
@@ -129,6 +129,14 @@ enum {
 #define SCO_PTYPE_MASK	(HCI_HV1 | HCI_HV2 | HCI_HV3)
 #define ACL_PTYPE_MASK	(~SCO_PTYPE_MASK)
 
+/* eSCO packet types */
+#define ESCO_HV1	0x0001
+#define ESCO_HV2	0x0002
+#define ESCO_HV3	0x0004
+#define ESCO_EV3	0x0008
+#define ESCO_EV4	0x0010
+#define ESCO_EV5	0x0020
+
 /* ACL flags */
 #define ACL_CONT		0x01
 #define ACL_START		0x02
@@ -138,6 +146,7 @@ enum {
 /* Baseband links */
 #define SCO_LINK	0x00
 #define ACL_LINK	0x01
+#define ESCO_LINK	0x02
 
 /* LMP features */
 #define LMP_3SLOT	0x01
@@ -162,6 +171,11 @@ enum {
 #define LMP_PSCHEME	0x02
 #define LMP_PCONTROL	0x04
 
+#define LMP_ESCO	0x80
+
+#define LMP_EV4		0x01
+#define LMP_EV5		0x02
+
 #define LMP_SNIFF_SUBR	0x02
 
 /* Connection modes */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 7c78744..8f67c8a 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -78,6 +78,7 @@ struct hci_dev {
 	__u16		voice_setting;
 
 	__u16		pkt_type;
+	__u16		esco_type;
 	__u16		link_policy;
 	__u16		link_mode;
 
@@ -452,6 +453,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define lmp_encrypt_capable(dev)   ((dev)->features[0] & LMP_ENCRYPT)
 #define lmp_sniff_capable(dev)     ((dev)->features[0] & LMP_SNIFF)
 #define lmp_sniffsubr_capable(dev) ((dev)->features[5] & LMP_SNIFF_SUBR)
+#define lmp_esco_capable(dev)      ((dev)->features[3] & LMP_ESCO)
 
 /* ----- HCI protocols ----- */
 struct hci_proto {
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 63980bd..5fdfc9a6 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -123,8 +123,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle)
 	conn->state = BT_CONNECT;
 	conn->out = 1;
 
-	cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
 	cp.handle   = cpu_to_le16(handle);
+	cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
 
 	hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ADD_SCO, sizeof(cp), &cp);
 }
@@ -220,19 +220,19 @@ int hci_conn_del(struct hci_conn *conn)
 
 	del_timer(&conn->disc_timer);
 
-	if (conn->type == SCO_LINK) {
-		struct hci_conn *acl = conn->link;
-		if (acl) {
-			acl->link = NULL;
-			hci_conn_put(acl);
-		}
-	} else {
+	if (conn->type == ACL_LINK) {
 		struct hci_conn *sco = conn->link;
 		if (sco)
 			sco->link = NULL;
 
 		/* Unacked frames */
 		hdev->acl_cnt += conn->sent;
+	} else {
+		struct hci_conn *acl = conn->link;
+		if (acl) {
+			acl->link = NULL;
+			hci_conn_put(acl);
+		}
 	}
 
 	tasklet_disable(&hdev->tx_task);
@@ -297,9 +297,10 @@ EXPORT_SYMBOL(hci_get_route);
 
 /* Create SCO or ACL connection.
  * Device _must_ be locked */
-struct hci_conn * hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst)
+struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst)
 {
 	struct hci_conn *acl;
+	struct hci_conn *sco;
 
 	BT_DBG("%s dst %s", hdev->name, batostr(dst));
 
@@ -313,28 +314,26 @@ struct hci_conn * hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst)
 	if (acl->state == BT_OPEN || acl->state == BT_CLOSED)
 		hci_acl_connect(acl);
 
-	if (type == SCO_LINK) {
-		struct hci_conn *sco;
+	if (type == ACL_LINK)
+		return acl;
 
-		if (!(sco = hci_conn_hash_lookup_ba(hdev, SCO_LINK, dst))) {
-			if (!(sco = hci_conn_add(hdev, SCO_LINK, dst))) {
-				hci_conn_put(acl);
-				return NULL;
-			}
+	if (!(sco = hci_conn_hash_lookup_ba(hdev, type, dst))) {
+		if (!(sco = hci_conn_add(hdev, type, dst))) {
+			hci_conn_put(acl);
+			return NULL;
 		}
-		acl->link = sco;
-		sco->link = acl;
+	}
 
-		hci_conn_hold(sco);
+	acl->link = sco;
+	sco->link = acl;
 
-		if (acl->state == BT_CONNECTED &&
-				(sco->state == BT_OPEN || sco->state == BT_CLOSED))
-			hci_add_sco(sco, acl->handle);
+	hci_conn_hold(sco);
 
-		return sco;
-	} else {
-		return acl;
-	}
+	if (acl->state == BT_CONNECTED &&
+			(sco->state == BT_OPEN || sco->state == BT_CLOSED))
+		hci_add_sco(sco, acl->handle);
+
+	return sco;
 }
 EXPORT_SYMBOL(hci_connect);
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 9c71cff..f6d867e 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -851,6 +851,7 @@ int hci_register_dev(struct hci_dev *hdev)
 
 	hdev->flags = 0;
 	hdev->pkt_type  = (HCI_DM1 | HCI_DH1 | HCI_HV1);
+	hdev->esco_type = (ESCO_HV1);
 	hdev->link_mode = (HCI_LM_ACCEPT);
 
 	hdev->idle_timeout = 0;
@@ -1254,7 +1255,7 @@ EXPORT_SYMBOL(hci_send_sco);
 static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int *quote)
 {
 	struct hci_conn_hash *h = &hdev->conn_hash;
-	struct hci_conn  *conn = NULL;
+	struct hci_conn *conn = NULL;
 	int num = 0, min = ~0;
 	struct list_head *p;
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 447ba71..4baea1e 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -350,11 +350,24 @@ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *s
 		if (hdev->features[0] & LMP_5SLOT)
 			hdev->pkt_type |= (HCI_DM5 | HCI_DH5);
 
-		if (hdev->features[1] & LMP_HV2)
-			hdev->pkt_type |= (HCI_HV2);
+		if (hdev->features[1] & LMP_HV2) {
+			hdev->pkt_type  |= (HCI_HV2);
+			hdev->esco_type |= (ESCO_HV2);
+		}
+
+		if (hdev->features[1] & LMP_HV3) {
+			hdev->pkt_type  |= (HCI_HV3);
+			hdev->esco_type |= (ESCO_HV3);
+		}
 
-		if (hdev->features[1] & LMP_HV3)
-			hdev->pkt_type |= (HCI_HV3);
+		if (hdev->features[3] & LMP_ESCO)
+			hdev->esco_type |= (ESCO_EV3);
+
+		if (hdev->features[4] & LMP_EV4)
+			hdev->esco_type |= (ESCO_EV4);
+
+		if (hdev->features[4] & LMP_EV5)
+			hdev->esco_type |= (ESCO_EV5);
 
 		BT_DBG("%s: features 0x%x 0x%x 0x%x", hdev->name,
 				lf->features[0], lf->features[1], lf->features[2]);
@@ -881,12 +894,12 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s
 		if (conn) {
 			conn->sent -= count;
 
-			if (conn->type == SCO_LINK) {
-				if ((hdev->sco_cnt += count) > hdev->sco_pkts)
-					hdev->sco_cnt = hdev->sco_pkts;
-			} else {
+			if (conn->type == ACL_LINK) {
 				if ((hdev->acl_cnt += count) > hdev->acl_pkts)
 					hdev->acl_cnt = hdev->acl_pkts;
+			} else {
+				if ((hdev->sco_cnt += count) > hdev->sco_pkts)
+					hdev->sco_cnt = hdev->sco_pkts;
 			}
 		}
 	}
-- 
cgit v0.10.2


From c6c6e3e05c0b4349824efcdd36650e7be9d5c7c3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 10 Jul 2007 22:41:55 -0700
Subject: [NET]: Update comments for skb checksums

Rusty (whose comments we should all study and emulate :) pointed
out that our comments for skb checksums are no longer up-to-date.
So here is a patch to

1) add the case of partial checksums on input;
2) update partial checksum case to mention csum_start/csum_offset;
3) mention the new IPv6 feature bit.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 625d73b..9391e4a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -65,13 +65,20 @@
  *	    is able to produce some skb->csum, it MUST use COMPLETE,
  *	    not UNNECESSARY.
  *
+ *	PARTIAL: identical to the case for output below.  This may occur
+ *	    on a packet received directly from another Linux OS, e.g.,
+ *	    a virtualised Linux kernel on the same host.  The packet can
+ *	    be treated in the same way as UNNECESSARY except that on
+ *	    output (i.e., forwarding) the checksum must be filled in
+ *	    by the OS or the hardware.
+ *
  * B. Checksumming on output.
  *
  *	NONE: skb is checksummed by protocol or csum is not required.
  *
  *	PARTIAL: device is required to csum packet as seen by hard_start_xmit
- *	from skb->transport_header to the end and to record the checksum
- *	at skb->transport_header + skb->csum.
+ *	from skb->csum_start to the end and to record the checksum
+ *	at skb->csum_start + skb->csum_offset.
  *
  *	Device must show its capabilities in dev->features, set
  *	at device setup time.
@@ -82,6 +89,7 @@
  *			  TCP/UDP over IPv4. Sigh. Vendors like this
  *			  way by an unknown reason. Though, see comment above
  *			  about CHECKSUM_UNNECESSARY. 8)
+ *	NETIF_F_IPV6_CSUM about as dumb as the last one but does IPv6 instead.
  *
  *	Any questions? No questions, good. 		--ANK
  */
-- 
cgit v0.10.2


From c9726d6890f7f3a892c879e067c3ed839f61e745 Mon Sep 17 00:00:00 2001
From: Ranjit Manomohan <ranjitm@google.com>
Date: Tue, 10 Jul 2007 22:43:16 -0700
Subject: [NET_SCHED]: Make HTB scheduler work with TSO.

Currently the HTB scheduler does not correctly account for TSO packets
which causes large inaccuracies in the bandwidth control when using TSO.
This patch allows the HTB scheduler to work with TSO enabled devices.

Signed-off-by: Ranjit Manomohan <ranjitm@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c031486..b417a95 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -129,15 +129,12 @@ struct htb_class {
 				/* of un.leaf originals should be done. */
 };
 
-/* TODO: maybe compute rate when size is too large .. or drop ? */
 static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
 			   int size)
 {
 	int slot = size >> rate->rate.cell_log;
-	if (slot > 255) {
-		cl->xstats.giants++;
-		slot = 255;
-	}
+	if (slot > 255)
+		return (rate->data[255]*(slot >> 8) + rate->data[slot & 0xFF]);
 	return rate->data[slot];
 }
 
@@ -606,13 +603,14 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		cl->qstats.drops++;
 		return NET_XMIT_DROP;
 	} else {
-		cl->bstats.packets++;
+		cl->bstats.packets +=
+			skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
 		cl->bstats.bytes += skb->len;
 		htb_activate(q, cl);
 	}
 
 	sch->q.qlen++;
-	sch->bstats.packets++;
+	sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
 	sch->bstats.bytes += skb->len;
 	return NET_XMIT_SUCCESS;
 }
@@ -661,8 +659,9 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
  * In such case we remove class from event queue first.
  */
 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
-			     int level, int bytes)
+			     int level, struct sk_buff *skb)
 {
+	int bytes = skb->len;
 	long toks, diff;
 	enum htb_cmode old_mode;
 
@@ -698,7 +697,8 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 		/* update byte stats except for leaves which are already updated */
 		if (cl->level) {
 			cl->bstats.bytes += bytes;
-			cl->bstats.packets++;
+			cl->bstats.packets += skb_is_gso(skb)?
+					skb_shinfo(skb)->gso_segs:1;
 		}
 		cl = cl->parent;
 	}
@@ -882,7 +882,7 @@ next:
 		   gives us slightly better performance */
 		if (!cl->un.leaf.q->q.qlen)
 			htb_deactivate(q, cl);
-		htb_charge_class(q, cl, level, skb->len);
+		htb_charge_class(q, cl, level, skb);
 	}
 	return skb;
 }
-- 
cgit v0.10.2


From c382bb9d32a55029fb13b118858e25908fab4617 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 10 Jul 2007 22:47:58 -0700
Subject: [IPV6]: Restore semantics of Routing Header processing.

The "fix" for emerging security threat was overkill and it broke
basic semantic of IPv6 routing header processing.  We should assume
RT0 (or even RT2, depends on configuration) as "unknown" RH type so
that we
- silently ignore the routing header if segleft == 0
- send ICMPv6 Parameter Problem message back to the sender,
  otherwise.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 173a4bb..fc3a961 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -372,22 +372,13 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 	struct rt0_hdr *rthdr;
 	int accept_source_route = ipv6_devconf.accept_source_route;
 
-	if (accept_source_route < 0 ||
-	    ((idev = in6_dev_get(skb->dev)) == NULL)) {
-		kfree_skb(skb);
-		return -1;
-	}
-	if (idev->cnf.accept_source_route < 0) {
+	idev = in6_dev_get(skb->dev);
+	if (idev) {
+		if (accept_source_route > idev->cnf.accept_source_route)
+			accept_source_route = idev->cnf.accept_source_route;
 		in6_dev_put(idev);
-		kfree_skb(skb);
-		return -1;
 	}
 
-	if (accept_source_route > idev->cnf.accept_source_route)
-		accept_source_route = idev->cnf.accept_source_route;
-
-	in6_dev_put(idev);
-
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
 				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
@@ -399,24 +390,6 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 
 	hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
 
-	switch (hdr->type) {
-#ifdef CONFIG_IPV6_MIP6
-	case IPV6_SRCRT_TYPE_2:
-		break;
-#endif
-	case IPV6_SRCRT_TYPE_0:
-		if (accept_source_route > 0)
-			break;
-		kfree_skb(skb);
-		return -1;
-	default:
-		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
-				 IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
-				  (&hdr->type) - skb_network_header(skb));
-		return -1;
-	}
-
 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
 	    skb->pkt_type != PACKET_HOST) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
@@ -455,6 +428,8 @@ looped_back:
 
 	switch (hdr->type) {
 	case IPV6_SRCRT_TYPE_0:
+		if (accept_source_route <= 0)
+			goto unknown_rh;
 		if (hdr->hdrlen & 0x01) {
 			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INHDRERRORS);
@@ -466,6 +441,8 @@ looped_back:
 		break;
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	case IPV6_SRCRT_TYPE_2:
+		if (accept_source_route < 0)
+			goto unknown_rh;
 		/* Silently discard invalid RTH type 2 */
 		if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
 			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
@@ -475,6 +452,8 @@ looped_back:
 		}
 		break;
 #endif
+	default:
+		goto unknown_rh;
 	}
 
 	/*
@@ -578,6 +557,12 @@ looped_back:
 	skb_push(skb, skb->data - skb_network_header(skb));
 	dst_input(skb);
 	return -1;
+
+unknown_rh:
+	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
+	icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+			  (&hdr->type) - skb_network_header(skb));
+	return -1;
 }
 
 static struct inet6_protocol rthdr_protocol = {
-- 
cgit v0.10.2


From bb4dbf9e61d0801927e7df2569bb3dd8287ea301 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 10 Jul 2007 22:55:49 -0700
Subject: [IPV6]: Do not send RH0 anymore.

Based on <draft-ietf-ipv6-deprecate-rh0-00.txt>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index af6a63a..09c184e 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -874,8 +874,7 @@ accept_redirects - BOOLEAN
 accept_source_route - INTEGER
 	Accept source routing (routing extension header).
 
-	> 0: Accept routing header.
-	= 0: Accept only routing header type 2.
+	>= 0: Accept only routing header type 2.
 	< 0: Do not accept routing header.
 
 	Default: 0
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 213b63be..cb3118c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -27,8 +27,8 @@ struct in6_ifreq {
 	int		ifr6_ifindex; 
 };
 
-#define IPV6_SRCRT_STRICT	0x01	/* this hop must be a neighbor	*/
-#define IPV6_SRCRT_TYPE_0	0	/* IPv6 type 0 Routing Header	*/
+#define IPV6_SRCRT_STRICT	0x01	/* Deprecated; will be removed */
+#define IPV6_SRCRT_TYPE_0	0	/* Deprecated; will be removed */
 #define IPV6_SRCRT_TYPE_2	2	/* IPv6 type 2 Routing Header	*/
 
 /*
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 78a0d06..46b9dce 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -512,10 +512,6 @@ extern int 			ipv6_ext_hdr(u8 nexthdr);
 
 extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type);
 
-extern struct ipv6_txoptions *	ipv6_invert_rthdr(struct sock *sk,
-						  struct ipv6_rt_hdr *hdr);
-
-
 /*
  *	socket options (ipv6_sockglue.c)
  */
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 31737cd..b158c66 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -253,17 +253,6 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 
 	if (dst == NULL) {
 		opt = np->opt;
-		if (opt == NULL &&
-		    np->rxopt.bits.osrcrt == 2 &&
-		    ireq6->pktopts) {
-			struct sk_buff *pktopts = ireq6->pktopts;
-			struct inet6_skb_parm *rxopt = IP6CB(pktopts);
-
-			if (rxopt->srcrt)
-				opt = ipv6_invert_rthdr(sk,
-			  (struct ipv6_rt_hdr *)(skb_network_header(pktopts) +
-						 rxopt->srcrt));
-		}
 
 		if (opt != NULL && opt->srcrt != NULL) {
 			const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
@@ -570,15 +559,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 	if (sk_acceptq_is_full(sk))
 		goto out_overflow;
 
-	if (np->rxopt.bits.osrcrt == 2 && opt == NULL && ireq6->pktopts) {
-		const struct inet6_skb_parm *rxopt = IP6CB(ireq6->pktopts);
-
-		if (rxopt->srcrt)
-			opt = ipv6_invert_rthdr(sk,
-		   (struct ipv6_rt_hdr *)(skb_network_header(ireq6->pktopts) +
-					  rxopt->srcrt));
-	}
-
 	if (dst == NULL) {
 		struct in6_addr *final_p = NULL, final;
 		struct flowi fl;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index ba1386d..fe0f490 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -657,11 +657,10 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 			rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
 
 			switch (rthdr->type) {
-			case IPV6_SRCRT_TYPE_0:
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 			case IPV6_SRCRT_TYPE_2:
-#endif
 				break;
+#endif
 			default:
 				err = -EINVAL;
 				goto exit_f;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index fc3a961..c82d4d4 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -427,18 +427,6 @@ looped_back:
 	}
 
 	switch (hdr->type) {
-	case IPV6_SRCRT_TYPE_0:
-		if (accept_source_route <= 0)
-			goto unknown_rh;
-		if (hdr->hdrlen & 0x01) {
-			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
-					 IPSTATS_MIB_INHDRERRORS);
-			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
-					  ((&hdr->hdrlen) -
-					   skb_network_header(skb)));
-			return -1;
-		}
-		break;
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	case IPV6_SRCRT_TYPE_2:
 		if (accept_source_route < 0)
@@ -576,72 +564,6 @@ void __init ipv6_rthdr_init(void)
 		printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n");
 };
 
-/*
-   This function inverts received rthdr.
-   NOTE: specs allow to make it automatically only if
-   packet authenticated.
-
-   I will not discuss it here (though, I am really pissed off at
-   this stupid requirement making rthdr idea useless)
-
-   Actually, it creates severe problems  for us.
-   Embryonic requests has no associated sockets,
-   so that user have no control over it and
-   cannot not only to set reply options, but
-   even to know, that someone wants to connect
-   without success. :-(
-
-   For now we need to test the engine, so that I created
-   temporary (or permanent) backdoor.
-   If listening socket set IPV6_RTHDR to 2, then we invert header.
-						   --ANK (980729)
- */
-
-struct ipv6_txoptions *
-ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
-{
-	/* Received rthdr:
-
-	   [ H1 -> H2 -> ... H_prev ]  daddr=ME
-
-	   Inverted result:
-	   [ H_prev -> ... -> H1 ] daddr =sender
-
-	   Note, that IP output engine will rewrite this rthdr
-	   by rotating it left by one addr.
-	 */
-
-	int n, i;
-	struct rt0_hdr *rthdr = (struct rt0_hdr*)hdr;
-	struct rt0_hdr *irthdr;
-	struct ipv6_txoptions *opt;
-	int hdrlen = ipv6_optlen(hdr);
-
-	if (hdr->segments_left ||
-	    hdr->type != IPV6_SRCRT_TYPE_0 ||
-	    hdr->hdrlen & 0x01)
-		return NULL;
-
-	n = hdr->hdrlen >> 1;
-	opt = sock_kmalloc(sk, sizeof(*opt) + hdrlen, GFP_ATOMIC);
-	if (opt == NULL)
-		return NULL;
-	memset(opt, 0, sizeof(*opt));
-	opt->tot_len = sizeof(*opt) + hdrlen;
-	opt->srcrt = (void*)(opt+1);
-	opt->opt_nflen = hdrlen;
-
-	memcpy(opt->srcrt, hdr, sizeof(*hdr));
-	irthdr = (struct rt0_hdr*)opt->srcrt;
-	irthdr->reserved = 0;
-	opt->srcrt->segments_left = n;
-	for (i=0; i<n; i++)
-		memcpy(irthdr->addr+i, rthdr->addr+(n-1-i), 16);
-	return opt;
-}
-
-EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
-
 /**********************************
   Hop-by-hop options.
  **********************************/
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 1c35066..1841714 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -416,11 +416,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		if (optname == IPV6_RTHDR && opt && opt->srcrt) {
 			struct ipv6_rt_hdr *rthdr = opt->srcrt;
 			switch (rthdr->type) {
-			case IPV6_SRCRT_TYPE_0:
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 			case IPV6_SRCRT_TYPE_2:
-#endif
 				break;
+#endif
 			default:
 				goto sticky_done;
 			}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 193d9d6..d67fb1e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -484,17 +484,6 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 
 	if (dst == NULL) {
 		opt = np->opt;
-		if (opt == NULL &&
-		    np->rxopt.bits.osrcrt == 2 &&
-		    treq->pktopts) {
-			struct sk_buff *pktopts = treq->pktopts;
-			struct inet6_skb_parm *rxopt = IP6CB(pktopts);
-			if (rxopt->srcrt)
-				opt = ipv6_invert_rthdr(sk,
-			  (struct ipv6_rt_hdr *)(skb_network_header(pktopts) +
-						 rxopt->srcrt));
-		}
-
 		if (opt && opt->srcrt) {
 			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
 			ipv6_addr_copy(&final, &fl.fl6_dst);
@@ -1391,15 +1380,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	if (sk_acceptq_is_full(sk))
 		goto out_overflow;
 
-	if (np->rxopt.bits.osrcrt == 2 &&
-	    opt == NULL && treq->pktopts) {
-		struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
-		if (rxopt->srcrt)
-			opt = ipv6_invert_rthdr(sk,
-		   (struct ipv6_rt_hdr *)(skb_network_header(treq->pktopts) +
-					  rxopt->srcrt));
-	}
-
 	if (dst == NULL) {
 		struct in6_addr *final_p = NULL, final;
 		struct flowi fl;
-- 
cgit v0.10.2


From 4c752098f529f41abfc985426a3eca0f2cb96676 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 May 2007 13:28:48 +0900
Subject: [IPV6]: Make IPV6_{RECV,2292}RTHDR boolean options.

Because reversing RH0 is no longer supported by deprecation
of RH0, let's make IPV6_{RECV,2292}RTHDR boolean options.
Boolean are more appropriate from standard POV.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index cb3118c..97983dc9 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -299,8 +299,8 @@ struct ipv6_pinfo {
 	/* pktoption flags */
 	union {
 		struct {
-			__u16	srcrt:2,
-				osrcrt:2,
+			__u16	srcrt:1,
+				osrcrt:1,
 			        rxinfo:1,
 			        rxoinfo:1,
 				rxhlim:1,
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 1841714..d684639 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -336,16 +336,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		break;
 
 	case IPV6_RECVRTHDR:
-		if (val < 0 || val > 2)
-			goto e_inval;
-		np->rxopt.bits.srcrt = val;
+		np->rxopt.bits.srcrt = valbool;
 		retv = 0;
 		break;
 
 	case IPV6_2292RTHDR:
-		if (val < 0 || val > 2)
-			goto e_inval;
-		np->rxopt.bits.osrcrt = val;
+		np->rxopt.bits.osrcrt = valbool;
 		retv = 0;
 		break;
 
-- 
cgit v0.10.2


From ed8b548ce3cb988f59a0fd9af6ccdc4f8198cd19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Tue, 10 Jul 2007 23:02:12 -0700
Subject: [DECNET]: Another unnecessary net/tcp.h inclusion in net/dn.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No longer needed.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/dn.h b/include/net/dn.h
index ac4ce90..6277783 100644
--- a/include/net/dn.h
+++ b/include/net/dn.h
@@ -3,7 +3,6 @@
 
 #include <linux/dn.h>
 #include <net/sock.h>
-#include <net/tcp.h>
 #include <asm/byteorder.h>
 
 #define dn_ntohs(x) le16_to_cpu(x)
-- 
cgit v0.10.2


From dffe4f048b420f1af0b10a6090add0c5ea69e585 Mon Sep 17 00:00:00 2001
From: Micah Gruber <micah.gruber@gmail.com>
Date: Tue, 10 Jul 2007 23:04:19 -0700
Subject: [IPV6]: Remove unneeded pointer idev from addrconf_cleanup().

This trivial patch removes the unneeded pointer idev returned from
__in6_dev_get(), which is never used. The check for NULL can be simply
done by if (__in6_dev_get(dev) == NULL).

Signed-off-by: Micah Gruber <micah.gruber@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 11c0028..95737ab 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4243,7 +4243,6 @@ errout:
 void __exit addrconf_cleanup(void)
 {
 	struct net_device *dev;
-	struct inet6_dev *idev;
 	struct inet6_ifaddr *ifa;
 	int i;
 
@@ -4261,7 +4260,7 @@ void __exit addrconf_cleanup(void)
 	 */
 
 	for_each_netdev(dev) {
-		if ((idev = __in6_dev_get(dev)) == NULL)
+		if (__in6_dev_get(dev) == NULL)
 			continue;
 		addrconf_ifdown(dev, 1);
 	}
-- 
cgit v0.10.2


From 3be550f34b03e5eb762f74d447ebbeba97efbd6d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 10 Jul 2007 23:06:43 -0700
Subject: [UDP]: Fix length check.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rémi Denis-Courmont wrote:
> Right. By the way, shouldn't "len" rather be signed in there?
>
> 		unsigned int len;
>
> 		/* if we're overly short, let UDP handle it */
> 		len = skb->len - sizeof(struct udphdr);
> 		if (len <= 0)
> 			goto udp;

It should, but the < 0 case can't happen since __udp4_lib_rcv
already makes sure that we have at least a complete UDP header.

Anyways, this patch fixes it.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4ec4a25..2835535 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -951,14 +951,10 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		 * >0 if skb should be passed on to UDP.
 		 * <0 if skb should be resubmitted as proto -N
 		 */
-		unsigned int len;
 
 		/* if we're overly short, let UDP handle it */
-		len = skb->len - sizeof(struct udphdr);
-		if (len <= 0)
-			goto udp;
-
-		if (up->encap_rcv != NULL) {
+		if (skb->len > sizeof(struct udphdr) &&
+		    up->encap_rcv != NULL) {
 			int ret;
 
 			ret = (*up->encap_rcv)(sk, skb);
@@ -971,7 +967,6 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		/* FALLTHROUGH -- it's a UDP Packet */
 	}
 
-udp:
 	/*
 	 * 	UDP-Lite specific tests, ignored on UDP sockets
 	 */
-- 
cgit v0.10.2


From 56b3d975bbce65f655c5612b4822da671f9fd9b2 Mon Sep 17 00:00:00 2001
From: Philippe De Muyter <phdm@macqel.be>
Date: Tue, 10 Jul 2007 23:07:31 -0700
Subject: [NET]: Make all initialized struct seq_operations const.

Make all initialized struct seq_operations in net/ const

Signed-off-by: Philippe De Muyter <phdm@macqel.be>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/802/tr.c b/net/802/tr.c
index 0ba1946..e56e61a 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -567,7 +567,7 @@ static int rif_seq_show(struct seq_file *seq, void *v)
 }
 
 
-static struct seq_operations rif_seq_ops = {
+static const struct seq_operations rif_seq_ops = {
 	.start = rif_seq_start,
 	.next  = rif_seq_next,
 	.stop  = rif_seq_stop,
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 8693b21..c0040c9 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -69,7 +69,7 @@ static const char name_conf[]	 = "config";
  *	Generic /proc/net/vlan/<file> file and inode operations
  */
 
-static struct seq_operations vlan_seq_ops = {
+static const struct seq_operations vlan_seq_ops = {
 	.start = vlan_seq_start,
 	.next = vlan_seq_next,
 	.stop = vlan_seq_stop,
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 5ef6a23..3d1655f 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -1024,7 +1024,7 @@ static int aarp_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations aarp_seq_ops = {
+static const struct seq_operations aarp_seq_ops = {
 	.start  = aarp_seq_start,
 	.next   = aarp_seq_next,
 	.stop   = aarp_seq_stop,
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index 57ff812..87a582c 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -204,21 +204,21 @@ out:
 	return 0;
 }
 
-static struct seq_operations atalk_seq_interface_ops = {
+static const struct seq_operations atalk_seq_interface_ops = {
 	.start  = atalk_seq_interface_start,
 	.next   = atalk_seq_interface_next,
 	.stop   = atalk_seq_interface_stop,
 	.show   = atalk_seq_interface_show,
 };
 
-static struct seq_operations atalk_seq_route_ops = {
+static const struct seq_operations atalk_seq_route_ops = {
 	.start  = atalk_seq_route_start,
 	.next   = atalk_seq_route_next,
 	.stop   = atalk_seq_route_stop,
 	.show   = atalk_seq_route_show,
 };
 
-static struct seq_operations atalk_seq_socket_ops = {
+static const struct seq_operations atalk_seq_socket_ops = {
 	.start  = atalk_seq_socket_start,
 	.next   = atalk_seq_socket_next,
 	.stop   = atalk_seq_socket_stop,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 3e26438..faa6aaf 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -758,7 +758,7 @@ static int br2684_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations br2684_seq_ops = {
+static const struct seq_operations br2684_seq_ops = {
 	.start = br2684_seq_start,
 	.next  = br2684_seq_next,
 	.stop  = br2684_seq_stop,
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 876b77f..ecf0f79 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -928,7 +928,7 @@ static int clip_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations arp_seq_ops = {
+static const struct seq_operations arp_seq_ops = {
 	.start	= clip_seq_start,
 	.next	= neigh_seq_next,
 	.stop	= neigh_seq_stop,
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 4dc5f2b..2770fb4 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1174,7 +1174,7 @@ static int lec_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations lec_seq_ops = {
+static const struct seq_operations lec_seq_ops = {
 	.start = lec_seq_start,
 	.next = lec_seq_next,
 	.stop = lec_seq_stop,
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 4b05cbe..91f3ffc 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -177,7 +177,7 @@ static int mpc_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations mpc_op = {
+static const struct seq_operations mpc_op = {
 	.start =	mpc_start,
 	.next =		mpc_next,
 	.stop =		mpc_stop,
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 9e61e51..88154da 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -260,7 +260,7 @@ static int atm_dev_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations atm_dev_seq_ops = {
+static const struct seq_operations atm_dev_seq_ops = {
 	.start	= atm_dev_seq_start,
 	.next	= atm_dev_seq_next,
 	.stop	= atm_dev_seq_stop,
@@ -295,7 +295,7 @@ static int pvc_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations pvc_seq_ops = {
+static const struct seq_operations pvc_seq_ops = {
 	.start	= vcc_seq_start,
 	.next	= vcc_seq_next,
 	.stop	= vcc_seq_stop,
@@ -329,7 +329,7 @@ static int vcc_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations vcc_seq_ops = {
+static const struct seq_operations vcc_seq_ops = {
 	.start	= vcc_seq_start,
 	.next	= vcc_seq_next,
 	.stop	= vcc_seq_stop,
@@ -364,7 +364,7 @@ static int svc_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations svc_seq_ops = {
+static const struct seq_operations svc_seq_ops = {
 	.start	= vcc_seq_start,
 	.next	= vcc_seq_next,
 	.stop	= vcc_seq_stop,
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 429e13a..c83cf84 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1924,7 +1924,7 @@ static int ax25_info_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ax25_info_seqops = {
+static const struct seq_operations ax25_info_seqops = {
 	.start = ax25_info_start,
 	.next = ax25_info_next,
 	.stop = ax25_info_stop,
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index d65b8e2..9ecf6f1 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -320,7 +320,7 @@ static int ax25_rt_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ax25_rt_seqops = {
+static const struct seq_operations ax25_rt_seqops = {
 	.start = ax25_rt_seq_start,
 	.next = ax25_rt_seq_next,
 	.stop = ax25_rt_seq_stop,
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 75c7664..ce0b13d 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -185,7 +185,7 @@ static int ax25_uid_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ax25_uid_seqops = {
+static const struct seq_operations ax25_uid_seqops = {
 	.start = ax25_uid_seq_start,
 	.next = ax25_uid_seq_next,
 	.stop = ax25_uid_seq_stop,
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index bfa910b..ed76d4a 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2304,7 +2304,7 @@ static int dn_socket_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations dn_socket_seq_ops = {
+static const struct seq_operations dn_socket_seq_ops = {
 	.start	= dn_socket_seq_start,
 	.next	= dn_socket_seq_next,
 	.stop	= dn_socket_seq_stop,
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index e31549e..fa6604f 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1416,7 +1416,7 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations dn_dev_seq_ops = {
+static const struct seq_operations dn_dev_seq_ops = {
 	.start	= dn_dev_seq_start,
 	.next	= dn_dev_seq_next,
 	.stop	= dn_dev_seq_stop,
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 4bf066c..174d8a7 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -569,7 +569,7 @@ static void *dn_neigh_seq_start(struct seq_file *seq, loff_t *pos)
 			       NEIGH_SEQ_NEIGH_ONLY);
 }
 
-static struct seq_operations dn_neigh_seq_ops = {
+static const struct seq_operations dn_neigh_seq_ops = {
 	.start = dn_neigh_seq_start,
 	.next  = neigh_seq_next,
 	.stop  = neigh_seq_stop,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index a8bf106..82622fb 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1726,7 +1726,7 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations dn_rt_cache_seq_ops = {
+static const struct seq_operations dn_rt_cache_seq_ops = {
 	.start	= dn_rt_cache_seq_start,
 	.next	= dn_rt_cache_seq_next,
 	.stop	= dn_rt_cache_seq_stop,
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 15ad5dd..8d6901d 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -549,7 +549,7 @@ static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ip_vs_app_seq_ops = {
+static const struct seq_operations ip_vs_app_seq_ops = {
 	.start = ip_vs_app_seq_start,
 	.next  = ip_vs_app_seq_next,
 	.stop  = ip_vs_app_seq_stop,
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 7018f97..3b446b1 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -745,7 +745,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ip_vs_conn_seq_ops = {
+static const struct seq_operations ip_vs_conn_seq_ops = {
 	.start = ip_vs_conn_seq_start,
 	.next  = ip_vs_conn_seq_next,
 	.stop  = ip_vs_conn_seq_stop,
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 68fe1d4..e1052bc 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -1783,7 +1783,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ip_vs_info_seq_ops = {
+static const struct seq_operations ip_vs_info_seq_ops = {
 	.start = ip_vs_info_seq_start,
 	.next  = ip_vs_info_seq_next,
 	.stop  = ip_vs_info_seq_stop,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 8bacda3..dcc12b1 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -639,7 +639,7 @@ static int clusterip_seq_show(struct seq_file *s, void *v)
 	return 0;
 }
 
-static struct seq_operations clusterip_seq_ops = {
+static const struct seq_operations clusterip_seq_ops = {
 	.start	= clusterip_seq_start,
 	.next	= clusterip_seq_next,
 	.stop	= clusterip_seq_stop,
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index a7b14f2..3218043 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -370,7 +370,7 @@ static int recent_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations recent_seq_ops = {
+static const struct seq_operations recent_seq_ops = {
 	.start		= recent_seq_start,
 	.next		= recent_seq_next,
 	.stop		= recent_seq_stop,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 434e084..3da9d73 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -164,7 +164,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	return 0;
 }
 
-static struct seq_operations ct_seq_ops = {
+static const struct seq_operations ct_seq_ops = {
 	.start = ct_seq_start,
 	.next  = ct_seq_next,
 	.stop  = ct_seq_stop,
@@ -282,7 +282,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
 	return seq_putc(s, '\n');
 }
 
-static struct seq_operations exp_seq_ops = {
+static const struct seq_operations exp_seq_ops = {
 	.start = exp_seq_start,
 	.next = exp_seq_next,
 	.stop = exp_seq_stop,
@@ -386,7 +386,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ct_cpu_seq_ops = {
+static const struct seq_operations ct_cpu_seq_ops = {
 	.start  = ct_cpu_seq_start,
 	.next   = ct_cpu_seq_next,
 	.stop   = ct_cpu_seq_stop,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 95737ab..24424c3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2785,7 +2785,7 @@ static int if6_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations if6_seq_ops = {
+static const struct seq_operations if6_seq_ops = {
 	.start	= if6_seq_start,
 	.next	= if6_seq_next,
 	.show	= if6_seq_show,
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 9b81264..b8c533f 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -539,7 +539,7 @@ static int ac6_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ac6_seq_ops = {
+static const struct seq_operations ac6_seq_ops = {
 	.start	=	ac6_seq_start,
 	.next	=	ac6_seq_next,
 	.stop	=	ac6_seq_stop,
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index c206a15..413a4eb 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -648,7 +648,7 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ip6fl_seq_ops = {
+static const struct seq_operations ip6fl_seq_ops = {
 	.start	=	ip6fl_seq_start,
 	.next	=	ip6fl_seq_next,
 	.stop	=	ip6fl_seq_stop,
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 3e308fb..ae98818 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2423,7 +2423,7 @@ static int igmp6_mc_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations igmp6_mc_seq_ops = {
+static const struct seq_operations igmp6_mc_seq_ops = {
 	.start	=	igmp6_mc_seq_start,
 	.next	=	igmp6_mc_seq_next,
 	.stop	=	igmp6_mc_seq_stop,
@@ -2597,7 +2597,7 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations igmp6_mcf_seq_ops = {
+static const struct seq_operations igmp6_mcf_seq_ops = {
 	.start	=	igmp6_mcf_seq_start,
 	.next	=	igmp6_mcf_seq_next,
 	.stop	=	igmp6_mcf_seq_stop,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index aac6aeb..e27383d 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1280,7 +1280,7 @@ static int raw6_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations raw6_seq_ops = {
+static const struct seq_operations raw6_seq_ops = {
 	.start =	raw6_seq_start,
 	.next =		raw6_seq_next,
 	.stop =		raw6_seq_stop,
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index db32ac8..4226e71 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -286,21 +286,21 @@ out:
 	return 0;
 }
 
-static struct seq_operations ipx_seq_interface_ops = {
+static const struct seq_operations ipx_seq_interface_ops = {
 	.start  = ipx_seq_interface_start,
 	.next   = ipx_seq_interface_next,
 	.stop   = ipx_seq_interface_stop,
 	.show   = ipx_seq_interface_show,
 };
 
-static struct seq_operations ipx_seq_route_ops = {
+static const struct seq_operations ipx_seq_route_ops = {
 	.start  = ipx_seq_route_start,
 	.next   = ipx_seq_route_next,
 	.stop   = ipx_seq_route_stop,
 	.show   = ipx_seq_route_show,
 };
 
-static struct seq_operations ipx_seq_socket_ops = {
+static const struct seq_operations ipx_seq_socket_ops = {
 	.start  = ipx_seq_socket_start,
 	.next   = ipx_seq_socket_next,
 	.stop   = ipx_seq_interface_stop,
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
index f097341..af0cea7 100644
--- a/net/irda/discovery.c
+++ b/net/irda/discovery.c
@@ -395,7 +395,7 @@ static int discovery_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations discovery_seq_ops = {
+static const struct seq_operations discovery_seq_ops = {
 	.start  = discovery_seq_start,
 	.next   = discovery_seq_next,
 	.stop   = discovery_seq_stop,
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
index 4749f8f..2d63fa8 100644
--- a/net/irda/ircomm/ircomm_core.c
+++ b/net/irda/ircomm/ircomm_core.c
@@ -562,7 +562,7 @@ static int ircomm_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ircomm_seq_ops = {
+static const struct seq_operations ircomm_seq_ops = {
 	.start  = ircomm_seq_start,
 	.next   = ircomm_seq_next,
 	.stop   = ircomm_seq_stop,
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 915d938..774eb70 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -1066,7 +1066,7 @@ static int irias_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations irias_seq_ops = {
+static const struct seq_operations irias_seq_ops = {
 	.start  = irias_seq_start,
 	.next   = irias_seq_next,
 	.stop   = irias_seq_stop,
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index ed69773..f5778ef 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -1217,7 +1217,7 @@ static int irlan_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations irlan_seq_ops = {
+static const struct seq_operations irlan_seq_ops = {
 	.start = irlan_seq_start,
 	.next  = irlan_seq_next,
 	.stop  = irlan_seq_stop,
diff --git a/net/irda/irlap.c b/net/irda/irlap.c
index d93ebd1..2fc9f51 100644
--- a/net/irda/irlap.c
+++ b/net/irda/irlap.c
@@ -1210,7 +1210,7 @@ static int irlap_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations irlap_seq_ops = {
+static const struct seq_operations irlap_seq_ops = {
 	.start  = irlap_seq_start,
 	.next   = irlap_seq_next,
 	.stop   = irlap_seq_stop,
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 9df0461..24a5e3f 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -1994,7 +1994,7 @@ static int irlmp_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations irlmp_seq_ops = {
+static const struct seq_operations irlmp_seq_ops = {
 	.start  = irlmp_seq_start,
 	.next   = irlmp_seq_next,
 	.stop   = irlmp_seq_stop,
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index ce46475..7f50832 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -1875,7 +1875,7 @@ static int irttp_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations irttp_seq_ops = {
+static const struct seq_operations irttp_seq_ops = {
 	.start  = irttp_seq_start,
 	.next   = irttp_seq_next,
 	.stop   = irttp_seq_stop,
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 3ab9d9f..49be6c9 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -184,14 +184,14 @@ out:
 	return 0;
 }
 
-static struct seq_operations llc_seq_socket_ops = {
+static const struct seq_operations llc_seq_socket_ops = {
 	.start  = llc_seq_start,
 	.next   = llc_seq_next,
 	.stop   = llc_seq_stop,
 	.show   = llc_seq_socket_show,
 };
 
-static struct seq_operations llc_seq_core_ops = {
+static const struct seq_operations llc_seq_core_ops = {
 	.start  = llc_seq_start,
 	.next   = llc_seq_next,
 	.stop   = llc_seq_stop,
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 513828f..2191fe0 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -464,7 +464,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
 	return seq_putc(s, '\n');
 }
 
-static struct seq_operations exp_seq_ops = {
+static const struct seq_operations exp_seq_ops = {
 	.start = exp_seq_start,
 	.next = exp_seq_next,
 	.stop = exp_seq_stop,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 54498bc..ffb6ff8 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -185,7 +185,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	return 0;
 }
 
-static struct seq_operations ct_seq_ops = {
+static const struct seq_operations ct_seq_ops = {
 	.start = ct_seq_start,
 	.next  = ct_seq_next,
 	.stop  = ct_seq_stop,
@@ -289,7 +289,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ct_cpu_seq_ops = {
+static const struct seq_operations ct_cpu_seq_ops = {
 	.start	= ct_cpu_seq_start,
 	.next	= ct_cpu_seq_next,
 	.stop	= ct_cpu_seq_stop,
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 91b220c..9498579 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -140,7 +140,7 @@ static int seq_show(struct seq_file *s, void *v)
 	return seq_printf(s, "%2lld %s\n", *pos, logger->name);
 }
 
-static struct seq_operations nflog_seq_ops = {
+static const struct seq_operations nflog_seq_ops = {
 	.start	= seq_start,
 	.next	= seq_next,
 	.stop	= seq_stop,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 823fbf4..a481a34 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -330,7 +330,7 @@ static int seq_show(struct seq_file *s, void *v)
 	return ret;
 }
 
-static struct seq_operations nfqueue_seq_ops = {
+static const struct seq_operations nfqueue_seq_ops = {
 	.start	= seq_start,
 	.next	= seq_next,
 	.stop	= seq_stop,
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e32e30e..e185a5b 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -962,7 +962,7 @@ static int seq_show(struct seq_file *s, void *v)
 			  inst->flushtimeout, atomic_read(&inst->use));
 }
 
-static struct seq_operations nful_seq_ops = {
+static const struct seq_operations nful_seq_ops = {
 	.start	= seq_start,
 	.next	= seq_next,
 	.stop	= seq_stop,
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7d47fc4..bb65a38 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1048,7 +1048,7 @@ static int seq_show(struct seq_file *s, void *v)
 			  atomic_read(&inst->use));
 }
 
-static struct seq_operations nfqnl_seq_ops = {
+static const struct seq_operations nfqnl_seq_ops = {
 	.start	= seq_start,
 	.next	= seq_next,
 	.stop	= seq_stop,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 520eddf..cc2baa6 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -745,7 +745,7 @@ static int xt_name_seq_show(struct seq_file *seq, void *v)
 		return 0;
 }
 
-static struct seq_operations xt_tgt_seq_ops = {
+static const struct seq_operations xt_tgt_seq_ops = {
 	.start	= xt_tgt_seq_start,
 	.next	= xt_tgt_seq_next,
 	.stop	= xt_tgt_seq_stop,
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 5a6ea9b..d6b3d01 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -701,7 +701,7 @@ static int dl_seq_show(struct seq_file *s, void *v)
 	return 0;
 }
 
-static struct seq_operations dl_seq_ops = {
+static const struct seq_operations dl_seq_ops = {
 	.start = dl_seq_start,
 	.next  = dl_seq_next,
 	.stop  = dl_seq_stop,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1f15821..a3c8e69 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1713,7 +1713,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations netlink_seq_ops = {
+static const struct seq_operations netlink_seq_ops = {
 	.start  = netlink_seq_start,
 	.next   = netlink_seq_next,
 	.stop   = netlink_seq_stop,
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 5d4a26c..5d66490 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1328,7 +1328,7 @@ static int nr_info_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations nr_info_seqops = {
+static const struct seq_operations nr_info_seqops = {
 	.start = nr_info_start,
 	.next = nr_info_next,
 	.stop = nr_info_stop,
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 2f76e062..24fe4a6 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -922,7 +922,7 @@ static int nr_node_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations nr_node_seqops = {
+static const struct seq_operations nr_node_seqops = {
 	.start = nr_node_start,
 	.next = nr_node_next,
 	.stop = nr_node_stop,
@@ -1006,7 +1006,7 @@ static int nr_neigh_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations nr_neigh_seqops = {
+static const struct seq_operations nr_neigh_seqops = {
 	.start = nr_neigh_start,
 	.next = nr_neigh_next,
 	.stop = nr_neigh_stop,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f8b8301..7c27bd3 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1928,7 +1928,7 @@ static int packet_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations packet_seq_ops = {
+static const struct seq_operations packet_seq_ops = {
 	.start	= packet_seq_start,
 	.next	= packet_seq_next,
 	.stop	= packet_seq_stop,
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index d476c43..f4d3aba 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1454,7 +1454,7 @@ static int rose_info_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations rose_info_seqops = {
+static const struct seq_operations rose_info_seqops = {
 	.start = rose_info_start,
 	.next = rose_info_next,
 	.stop = rose_info_stop,
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 929a784..bbcbad1 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -1118,7 +1118,7 @@ static int rose_node_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations rose_node_seqops = {
+static const struct seq_operations rose_node_seqops = {
 	.start = rose_node_start,
 	.next = rose_node_next,
 	.stop = rose_node_stop,
@@ -1200,7 +1200,7 @@ static int rose_neigh_show(struct seq_file *seq, void *v)
 }
 
 
-static struct seq_operations rose_neigh_seqops = {
+static const struct seq_operations rose_neigh_seqops = {
 	.start = rose_neigh_start,
 	.next = rose_neigh_next,
 	.stop = rose_neigh_stop,
@@ -1284,7 +1284,7 @@ static int rose_route_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations rose_route_seqops = {
+static const struct seq_operations rose_route_seqops = {
 	.start = rose_route_start,
 	.next = rose_route_next,
 	.stop = rose_route_stop,
diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c
index 77cc5fb..2e83ce3 100644
--- a/net/rxrpc/ar-proc.c
+++ b/net/rxrpc/ar-proc.c
@@ -86,7 +86,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations rxrpc_call_seq_ops = {
+static const struct seq_operations rxrpc_call_seq_ops = {
 	.start  = rxrpc_call_seq_start,
 	.next   = rxrpc_call_seq_next,
 	.stop   = rxrpc_call_seq_stop,
@@ -170,7 +170,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations rxrpc_connection_seq_ops = {
+static const struct seq_operations rxrpc_connection_seq_ops = {
 	.start  = rxrpc_connection_seq_start,
 	.next   = rxrpc_connection_seq_next,
 	.stop   = rxrpc_connection_seq_stop,
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 2f12bf2..e4cd841 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -250,7 +250,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations sctp_eps_ops = {
+static const struct seq_operations sctp_eps_ops = {
 	.start = sctp_eps_seq_start,
 	.next  = sctp_eps_seq_next,
 	.stop  = sctp_eps_seq_stop,
@@ -361,7 +361,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations sctp_assoc_ops = {
+static const struct seq_operations sctp_assoc_ops = {
 	.start = sctp_assocs_seq_start,
 	.next  = sctp_assocs_seq_next,
 	.stop  = sctp_assocs_seq_stop,
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 543b085..01c3c41 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1210,7 +1210,7 @@ static int c_show(struct seq_file *m, void *p)
 	return cd->cache_show(m, cd, cp);
 }
 
-static struct seq_operations cache_content_op = {
+static const struct seq_operations cache_content_op = {
 	.start	= c_start,
 	.next	= c_next,
 	.stop	= c_stop,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d70fa30..3654b64 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2048,7 +2048,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations unix_seq_ops = {
+static const struct seq_operations unix_seq_ops = {
 	.start  = unix_seq_start,
 	.next   = unix_seq_next,
 	.stop   = unix_seq_stop,
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
index 2051065..236e7ea 100644
--- a/net/wanrouter/wanproc.c
+++ b/net/wanrouter/wanproc.c
@@ -164,14 +164,14 @@ static int status_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations config_op = {
+static const struct seq_operations config_op = {
 	.start	= r_start,
 	.next	= r_next,
 	.stop	= r_stop,
 	.show	= config_show,
 };
 
-static struct seq_operations status_op = {
+static const struct seq_operations status_op = {
 	.start	= r_start,
 	.next	= r_next,
 	.stop	= r_stop,
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 96001f0..7405b9c 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -234,21 +234,21 @@ out:
 	return 0;
 }
 
-static struct seq_operations x25_seq_route_ops = {
+static const struct seq_operations x25_seq_route_ops = {
 	.start  = x25_seq_route_start,
 	.next   = x25_seq_route_next,
 	.stop   = x25_seq_route_stop,
 	.show   = x25_seq_route_show,
 };
 
-static struct seq_operations x25_seq_socket_ops = {
+static const struct seq_operations x25_seq_socket_ops = {
 	.start  = x25_seq_socket_start,
 	.next   = x25_seq_socket_next,
 	.stop   = x25_seq_socket_stop,
 	.show   = x25_seq_socket_show,
 };
 
-static struct seq_operations x25_seq_forward_ops = {
+static const struct seq_operations x25_seq_forward_ops = {
 	.start  = x25_seq_forward_start,
 	.next   = x25_seq_forward_next,
 	.stop   = x25_seq_forward_stop,
-- 
cgit v0.10.2


From 99d24edeb6abc6ca3a0d0fbdb83c664c04403c8c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 10 Jul 2007 23:24:52 -0700
Subject: [NETFILTER]: {ip, nf}_conntrack_sctp: fix remotely triggerable NULL
 ptr dereference (CVE-2007-2876)

When creating a new connection by sending an unknown chunk type, we
don't transition to a valid state, causing a NULL pointer dereference
in sctp_packet when accessing sctp_timeouts[SCTP_CONNTRACK_NONE].

Fix by don't creating new conntrack entry if initial state is invalid.

Noticed by Vilmos Nebehaj <vilmos.nebehaj@ramsys.hu>

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 265769e..debfe61 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -431,7 +431,8 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
 					 SCTP_CONNTRACK_NONE, sch->type);
 
 		/* Invalid: delete conntrack */
-		if (newconntrack == SCTP_CONNTRACK_MAX) {
+		if (newconntrack == SCTP_CONNTRACK_NONE ||
+		    newconntrack == SCTP_CONNTRACK_MAX) {
 			pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
 			return 0;
 		}
-- 
cgit v0.10.2


From 1fd05ba5a2f2aa8e7b9b52ef55df850e2e7d54c9 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 11 Jul 2007 14:22:39 -0700
Subject: [AF_UNIX]: Rewrite garbage collector, fixes race.

Throw out the old mark & sweep garbage collector and put in a
refcounting cycle detecting one.

The old one had a race with recvmsg, that resulted in false positives
and hence data loss.  The old algorithm operated on all unix sockets
in the system, so any additional locking would have meant performance
problems for all users of these.

The new algorithm instead only operates on "in flight" sockets, which
are very rare, and the additional locking for these doesn't negatively
impact the vast majority of users.

In fact it's probable, that there weren't *any* heavy senders of
sockets over sockets, otherwise the above race would have been
discovered long ago.

The patch works OK with the app that exposed the race with the old
code.  The garbage collection has also been verified to work in a few
simple cases.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 65f49fd..6de1e9e 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -79,9 +79,10 @@ struct unix_sock {
 	struct mutex		readlock;
         struct sock		*peer;
         struct sock		*other;
-        struct sock		*gc_tree;
+	struct list_head	link;
         atomic_t                inflight;
         spinlock_t		lock;
+	unsigned int		gc_candidate : 1;
         wait_queue_head_t       peer_wait;
 };
 #define unix_sk(__sk) ((struct unix_sock *)__sk)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3654b64..65ebccc 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -592,7 +592,8 @@ static struct sock * unix_create1(struct socket *sock)
 	u->dentry = NULL;
 	u->mnt	  = NULL;
 	spin_lock_init(&u->lock);
-	atomic_set(&u->inflight, sock ? 0 : -1);
+	atomic_set(&u->inflight, 0);
+	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->readlock); /* single task reading lock */
 	init_waitqueue_head(&u->peer_wait);
 	unix_insert_socket(unix_sockets_unbound, sk);
@@ -1134,9 +1135,6 @@ restart:
 	/* take ten and and send info to listening sock */
 	spin_lock(&other->sk_receive_queue.lock);
 	__skb_queue_tail(&other->sk_receive_queue, skb);
-	/* Undo artificially decreased inflight after embrion
-	 * is installed to listening socket. */
-	atomic_inc(&newu->inflight);
 	spin_unlock(&other->sk_receive_queue.lock);
 	unix_state_unlock(other);
 	other->sk_data_ready(other, 0);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index f20b7ea..406b643 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -62,6 +62,10 @@
  *	AV		1 Mar 1999
  *		Damn. Added missing check for ->dead in listen queues scanning.
  *
+ *	Miklos Szeredi 25 Jun 2007
+ *		Reimplement with a cycle collecting algorithm. This should
+ *		solve several problems with the previous code, like being racy
+ *		wrt receive and holding up unrelated socket operations.
  */
 
 #include <linux/kernel.h>
@@ -84,10 +88,9 @@
 
 /* Internal data structures and random procedures: */
 
-#define GC_HEAD		((struct sock *)(-1))
-#define GC_ORPHAN	((struct sock *)(-3))
-
-static struct sock *gc_current = GC_HEAD; /* stack of objects to mark */
+static LIST_HEAD(gc_inflight_list);
+static LIST_HEAD(gc_candidates);
+static DEFINE_SPINLOCK(unix_gc_lock);
 
 atomic_t unix_tot_inflight = ATOMIC_INIT(0);
 
@@ -122,8 +125,16 @@ void unix_inflight(struct file *fp)
 {
 	struct sock *s = unix_get_socket(fp);
 	if(s) {
-		atomic_inc(&unix_sk(s)->inflight);
+		struct unix_sock *u = unix_sk(s);
+		spin_lock(&unix_gc_lock);
+		if (atomic_inc_return(&u->inflight) == 1) {
+			BUG_ON(!list_empty(&u->link));
+			list_add_tail(&u->link, &gc_inflight_list);
+		} else {
+			BUG_ON(list_empty(&u->link));
+		}
 		atomic_inc(&unix_tot_inflight);
+		spin_unlock(&unix_gc_lock);
 	}
 }
 
@@ -131,182 +142,218 @@ void unix_notinflight(struct file *fp)
 {
 	struct sock *s = unix_get_socket(fp);
 	if(s) {
-		atomic_dec(&unix_sk(s)->inflight);
+		struct unix_sock *u = unix_sk(s);
+		spin_lock(&unix_gc_lock);
+		BUG_ON(list_empty(&u->link));
+		if (atomic_dec_and_test(&u->inflight))
+			list_del_init(&u->link);
 		atomic_dec(&unix_tot_inflight);
+		spin_unlock(&unix_gc_lock);
 	}
 }
 
+static inline struct sk_buff *sock_queue_head(struct sock *sk)
+{
+	return (struct sk_buff *) &sk->sk_receive_queue;
+}
 
-/*
- *	Garbage Collector Support Functions
- */
+#define receive_queue_for_each_skb(sk, next, skb) \
+	for (skb = sock_queue_head(sk)->next, next = skb->next; \
+	     skb != sock_queue_head(sk); skb = next, next = skb->next)
 
-static inline struct sock *pop_stack(void)
+static void scan_inflight(struct sock *x, void (*func)(struct sock *),
+			  struct sk_buff_head *hitlist)
 {
-	struct sock *p = gc_current;
-	gc_current = unix_sk(p)->gc_tree;
-	return p;
+	struct sk_buff *skb;
+	struct sk_buff *next;
+
+	spin_lock(&x->sk_receive_queue.lock);
+	receive_queue_for_each_skb(x, next, skb) {
+		/*
+		 *	Do we have file descriptors ?
+		 */
+		if (UNIXCB(skb).fp) {
+			bool hit = false;
+			/*
+			 *	Process the descriptors of this socket
+			 */
+			int nfd = UNIXCB(skb).fp->count;
+			struct file **fp = UNIXCB(skb).fp->fp;
+			while (nfd--) {
+				/*
+				 *	Get the socket the fd matches
+				 *	if it indeed does so
+				 */
+				struct sock *sk = unix_get_socket(*fp++);
+				if(sk) {
+					hit = true;
+					func(sk);
+				}
+			}
+			if (hit && hitlist != NULL) {
+				__skb_unlink(skb, &x->sk_receive_queue);
+				__skb_queue_tail(hitlist, skb);
+			}
+		}
+	}
+	spin_unlock(&x->sk_receive_queue.lock);
 }
 
-static inline int empty_stack(void)
+static void scan_children(struct sock *x, void (*func)(struct sock *),
+			  struct sk_buff_head *hitlist)
 {
-	return gc_current == GC_HEAD;
+	if (x->sk_state != TCP_LISTEN)
+		scan_inflight(x, func, hitlist);
+	else {
+		struct sk_buff *skb;
+		struct sk_buff *next;
+		struct unix_sock *u;
+		LIST_HEAD(embryos);
+
+		/*
+		 * For a listening socket collect the queued embryos
+		 * and perform a scan on them as well.
+		 */
+		spin_lock(&x->sk_receive_queue.lock);
+		receive_queue_for_each_skb(x, next, skb) {
+			u = unix_sk(skb->sk);
+
+			/*
+			 * An embryo cannot be in-flight, so it's safe
+			 * to use the list link.
+			 */
+			BUG_ON(!list_empty(&u->link));
+			list_add_tail(&u->link, &embryos);
+		}
+		spin_unlock(&x->sk_receive_queue.lock);
+
+		while (!list_empty(&embryos)) {
+			u = list_entry(embryos.next, struct unix_sock, link);
+			scan_inflight(&u->sk, func, hitlist);
+			list_del_init(&u->link);
+		}
+	}
 }
 
-static void maybe_unmark_and_push(struct sock *x)
+static void dec_inflight(struct sock *sk)
 {
-	struct unix_sock *u = unix_sk(x);
+	atomic_dec(&unix_sk(sk)->inflight);
+}
 
-	if (u->gc_tree != GC_ORPHAN)
-		return;
-	sock_hold(x);
-	u->gc_tree = gc_current;
-	gc_current = x;
+static void inc_inflight(struct sock *sk)
+{
+	atomic_inc(&unix_sk(sk)->inflight);
 }
 
+static void inc_inflight_move_tail(struct sock *sk)
+{
+	struct unix_sock *u = unix_sk(sk);
+
+	atomic_inc(&u->inflight);
+	/*
+	 * If this is still a candidate, move it to the end of the
+	 * list, so that it's checked even if it was already passed
+	 * over
+	 */
+	if (u->gc_candidate)
+		list_move_tail(&u->link, &gc_candidates);
+}
 
 /* The external entry point: unix_gc() */
 
 void unix_gc(void)
 {
-	static DEFINE_MUTEX(unix_gc_sem);
-	int i;
-	struct sock *s;
-	struct sk_buff_head hitlist;
-	struct sk_buff *skb;
+	static bool gc_in_progress = false;
 
-	/*
-	 *	Avoid a recursive GC.
-	 */
+	struct unix_sock *u;
+	struct unix_sock *next;
+	struct sk_buff_head hitlist;
+	struct list_head cursor;
 
-	if (!mutex_trylock(&unix_gc_sem))
-		return;
+	spin_lock(&unix_gc_lock);
 
-	spin_lock(&unix_table_lock);
+	/* Avoid a recursive GC. */
+	if (gc_in_progress)
+		goto out;
 
-	forall_unix_sockets(i, s)
-	{
-		unix_sk(s)->gc_tree = GC_ORPHAN;
-	}
+	gc_in_progress = true;
 	/*
-	 *	Everything is now marked
-	 */
-
-	/* Invariant to be maintained:
-		- everything unmarked is either:
-		-- (a) on the stack, or
-		-- (b) has all of its children unmarked
-		- everything on the stack is always unmarked
-		- nothing is ever pushed onto the stack twice, because:
-		-- nothing previously unmarked is ever pushed on the stack
+	 * First, select candidates for garbage collection.  Only
+	 * in-flight sockets are considered, and from those only ones
+	 * which don't have any external reference.
+	 *
+	 * Holding unix_gc_lock will protect these candidates from
+	 * being detached, and hence from gaining an external
+	 * reference.  This also means, that since there are no
+	 * possible receivers, the receive queues of these sockets are
+	 * static during the GC, even though the dequeue is done
+	 * before the detach without atomicity guarantees.
 	 */
+	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+		int total_refs;
+		int inflight_refs;
+
+		total_refs = file_count(u->sk.sk_socket->file);
+		inflight_refs = atomic_read(&u->inflight);
+
+		BUG_ON(inflight_refs < 1);
+		BUG_ON(total_refs < inflight_refs);
+		if (total_refs == inflight_refs) {
+			list_move_tail(&u->link, &gc_candidates);
+			u->gc_candidate = 1;
+		}
+	}
 
 	/*
-	 *	Push root set
+	 * Now remove all internal in-flight reference to children of
+	 * the candidates.
 	 */
-
-	forall_unix_sockets(i, s)
-	{
-		int open_count = 0;
-
-		/*
-		 *	If all instances of the descriptor are not
-		 *	in flight we are in use.
-		 *
-		 *	Special case: when socket s is embrion, it may be
-		 *	hashed but still not in queue of listening socket.
-		 *	In this case (see unix_create1()) we set artificial
-		 *	negative inflight counter to close race window.
-		 *	It is trick of course and dirty one.
-		 */
-		if (s->sk_socket && s->sk_socket->file)
-			open_count = file_count(s->sk_socket->file);
-		if (open_count > atomic_read(&unix_sk(s)->inflight))
-			maybe_unmark_and_push(s);
-	}
+	list_for_each_entry(u, &gc_candidates, link)
+		scan_children(&u->sk, dec_inflight, NULL);
 
 	/*
-	 *	Mark phase
+	 * Restore the references for children of all candidates,
+	 * which have remaining references.  Do this recursively, so
+	 * only those remain, which form cyclic references.
+	 *
+	 * Use a "cursor" link, to make the list traversal safe, even
+	 * though elements might be moved about.
 	 */
+	list_add(&cursor, &gc_candidates);
+	while (cursor.next != &gc_candidates) {
+		u = list_entry(cursor.next, struct unix_sock, link);
 
-	while (!empty_stack())
-	{
-		struct sock *x = pop_stack();
-		struct sock *sk;
-
-		spin_lock(&x->sk_receive_queue.lock);
-		skb = skb_peek(&x->sk_receive_queue);
-
-		/*
-		 *	Loop through all but first born
-		 */
+		/* Move cursor to after the current position. */
+		list_move(&cursor, &u->link);
 
-		while (skb && skb != (struct sk_buff *)&x->sk_receive_queue) {
-			/*
-			 *	Do we have file descriptors ?
-			 */
-			if(UNIXCB(skb).fp)
-			{
-				/*
-				 *	Process the descriptors of this socket
-				 */
-				int nfd=UNIXCB(skb).fp->count;
-				struct file **fp = UNIXCB(skb).fp->fp;
-				while(nfd--)
-				{
-					/*
-					 *	Get the socket the fd matches if
-					 *	it indeed does so
-					 */
-					if((sk=unix_get_socket(*fp++))!=NULL)
-					{
-						maybe_unmark_and_push(sk);
-					}
-				}
-			}
-			/* We have to scan not-yet-accepted ones too */
-			if (x->sk_state == TCP_LISTEN)
-				maybe_unmark_and_push(skb->sk);
-			skb=skb->next;
+		if (atomic_read(&u->inflight) > 0) {
+			list_move_tail(&u->link, &gc_inflight_list);
+			u->gc_candidate = 0;
+			scan_children(&u->sk, inc_inflight_move_tail, NULL);
 		}
-		spin_unlock(&x->sk_receive_queue.lock);
-		sock_put(x);
 	}
+	list_del(&cursor);
 
+	/*
+	 * Now gc_candidates contains only garbage.  Restore original
+	 * inflight counters for these as well, and remove the skbuffs
+	 * which are creating the cycle(s).
+	 */
 	skb_queue_head_init(&hitlist);
+	list_for_each_entry(u, &gc_candidates, link)
+		scan_children(&u->sk, inc_inflight, &hitlist);
 
-	forall_unix_sockets(i, s)
-	{
-		struct unix_sock *u = unix_sk(s);
+	spin_unlock(&unix_gc_lock);
 
-		if (u->gc_tree == GC_ORPHAN) {
-			struct sk_buff *nextsk;
+	/* Here we are. Hitlist is filled. Die. */
+	__skb_queue_purge(&hitlist);
 
-			spin_lock(&s->sk_receive_queue.lock);
-			skb = skb_peek(&s->sk_receive_queue);
-			while (skb &&
-			       skb != (struct sk_buff *)&s->sk_receive_queue) {
-				nextsk = skb->next;
-				/*
-				 *	Do we have file descriptors ?
-				 */
-				if (UNIXCB(skb).fp) {
-					__skb_unlink(skb,
-						     &s->sk_receive_queue);
-					__skb_queue_tail(&hitlist, skb);
-				}
-				skb = nextsk;
-			}
-			spin_unlock(&s->sk_receive_queue.lock);
-		}
-		u->gc_tree = GC_ORPHAN;
-	}
-	spin_unlock(&unix_table_lock);
+	spin_lock(&unix_gc_lock);
 
-	/*
-	 *	Here we are. Hitlist is filled. Die.
-	 */
+	/* All candidates should have been detached by now. */
+	BUG_ON(!list_empty(&gc_candidates));
+	gc_in_progress = false;
 
-	__skb_queue_purge(&hitlist);
-	mutex_unlock(&unix_gc_sem);
+ out:
+	spin_unlock(&unix_gc_lock);
 }
-- 
cgit v0.10.2


From 29578624e354f56143d92510fff33a8b2aaa2c03 Mon Sep 17 00:00:00 2001
From: Olaf Kirch <olaf.kirch@oracle.com>
Date: Wed, 11 Jul 2007 19:32:02 -0700
Subject: [NET]: Fix races in net_rx_action vs netpoll.

Keep netpoll/poll_napi from messing with the poll_list.
Only net_rx_action is allowed to manipulate the list.

Signed-off-by: Olaf Kirch <olaf.kirch@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8590d68..79cc3da 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -261,6 +261,8 @@ enum netdev_state_t
 	__LINK_STATE_LINKWATCH_PENDING,
 	__LINK_STATE_DORMANT,
 	__LINK_STATE_QDISC_RUNNING,
+	/* Set by the netpoll NAPI code */
+	__LINK_STATE_POLL_LIST_FROZEN,
 };
 
 
@@ -1014,6 +1016,14 @@ static inline void netif_rx_complete(struct net_device *dev)
 {
 	unsigned long flags;
 
+#ifdef CONFIG_NETPOLL
+	/* Prevent race with netpoll - yes, this is a kludge.
+	 * But at least it doesn't penalize the non-netpoll
+	 * code path. */
+	if (test_bit(__LINK_STATE_POLL_LIST_FROZEN, &dev->state))
+		return;
+#endif
+
 	local_irq_save(flags);
 	__netif_rx_complete(dev);
 	local_irq_restore(flags);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index de1b26a..d1264e9 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -124,6 +124,13 @@ static void poll_napi(struct netpoll *np)
 	if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
 	    npinfo->poll_owner != smp_processor_id() &&
 	    spin_trylock(&npinfo->poll_lock)) {
+		/* When calling dev->poll from poll_napi, we may end up in
+		 * netif_rx_complete. However, only the CPU to which the
+		 * device was queued is allowed to remove it from poll_list.
+		 * Setting POLL_LIST_FROZEN tells netif_rx_complete
+		 * to leave the NAPI state alone.
+		 */
+		set_bit(__LINK_STATE_POLL_LIST_FROZEN, &np->dev->state);
 		npinfo->rx_flags |= NETPOLL_RX_DROP;
 		atomic_inc(&trapped);
 
@@ -131,6 +138,7 @@ static void poll_napi(struct netpoll *np)
 
 		atomic_dec(&trapped);
 		npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+		clear_bit(__LINK_STATE_POLL_LIST_FROZEN, &np->dev->state);
 		spin_unlock(&npinfo->poll_lock);
 	}
 }
-- 
cgit v0.10.2


From 71bffe556c59a7865bf0b1ecd94530f1e296cdb0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 11 Jul 2007 19:41:18 -0700
Subject: [ETH]: Validate address in eth_mac_addr

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 1387e54..12c7657 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -266,8 +266,11 @@ void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev,
 static int eth_mac_addr(struct net_device *dev, void *p)
 {
 	struct sockaddr *addr = p;
+
 	if (netif_running(dev))
 		return -EBUSY;
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
 	return 0;
 }
-- 
cgit v0.10.2


From 8c979c26a0f093c13290320edda799d8335e50ae Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 11 Jul 2007 19:45:24 -0700
Subject: [VLAN]: Fix MAC address handling

The VLAN MAC address handling is broken in multiple ways. When the address
differs when setting it, the real device is put in promiscous mode twice,
but never taken out again. Additionally it doesn't resync when the real
device's address is changed and needlessly puts it in promiscous mode when
the vlan device is still down.

Fix by moving address handling to vlan_dev_open/vlan_dev_stop and properly
deal with address changes in the device notifier. Also switch to
dev_unicast_add (which needs the exact same handling).

Since the set_mac_address handler is identical to the generic ethernet one
with these changes, kill it and use ether_setup().

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index c791287..61a57dc 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -135,6 +135,7 @@ struct vlan_dev_info {
 	int old_allmulti;               /* similar to above. */
 	int old_promiscuity;            /* similar to above. */
 	struct net_device *real_dev;    /* the underlying device/interface */
+	unsigned char real_dev_addr[ETH_ALEN];
 	struct proc_dir_entry *dent;    /* Holds the proc data */
 	unsigned long cnt_inc_headroom_on_tx; /* How many times did we have to grow the skb on TX. */
 	unsigned long cnt_encap_on_xmit;      /* How many times did we have to encapsulate the skb on TX. */
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index e7583ee..b463ba4 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -345,12 +345,8 @@ static int vlan_dev_init(struct net_device *dev)
 					  (1<<__LINK_STATE_DORMANT))) |
 		      (1<<__LINK_STATE_PRESENT);
 
-	/* TODO: maybe just assign it to be ETHERNET? */
-	dev->type = real_dev->type;
-
 	memcpy(dev->broadcast, real_dev->broadcast, real_dev->addr_len);
 	memcpy(dev->dev_addr, real_dev->dev_addr, real_dev->addr_len);
-	dev->addr_len = real_dev->addr_len;
 
 	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
 		dev->hard_header     = real_dev->hard_header;
@@ -364,6 +360,7 @@ static int vlan_dev_init(struct net_device *dev)
 		dev->rebuild_header  = vlan_dev_rebuild_header;
 	}
 	dev->hard_header_parse = real_dev->hard_header_parse;
+	dev->hard_header_cache = NULL;
 
 	lockdep_set_class(&dev->_xmit_lock, &vlan_netdev_xmit_lock_key);
 	return 0;
@@ -373,6 +370,8 @@ void vlan_setup(struct net_device *new_dev)
 {
 	SET_MODULE_OWNER(new_dev);
 
+	ether_setup(new_dev);
+
 	/* new_dev->ifindex = 0;  it will be set when added to
 	 * the global list.
 	 * iflink is set as well.
@@ -392,7 +391,6 @@ void vlan_setup(struct net_device *new_dev)
 	new_dev->init = vlan_dev_init;
 	new_dev->open = vlan_dev_open;
 	new_dev->stop = vlan_dev_stop;
-	new_dev->set_mac_address = vlan_dev_set_mac_address;
 	new_dev->set_multicast_list = vlan_dev_set_multicast_list;
 	new_dev->destructor = free_netdev;
 	new_dev->do_ioctl = vlan_dev_ioctl;
@@ -592,6 +590,30 @@ out_free_newdev:
 	return err;
 }
 
+static void vlan_sync_address(struct net_device *dev,
+			      struct net_device *vlandev)
+{
+	struct vlan_dev_info *vlan = VLAN_DEV_INFO(vlandev);
+
+	/* May be called without an actual change */
+	if (!compare_ether_addr(vlan->real_dev_addr, dev->dev_addr))
+		return;
+
+	/* vlan address was different from the old address and is equal to
+	 * the new address */
+	if (compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
+	    !compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
+		dev_unicast_delete(dev, vlandev->dev_addr, ETH_ALEN);
+
+	/* vlan address was equal to the old address and is different from
+	 * the new address */
+	if (!compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
+	    compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
+		dev_unicast_add(dev, vlandev->dev_addr, ETH_ALEN);
+
+	memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
+}
+
 static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
@@ -618,6 +640,17 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		}
 		break;
 
+	case NETDEV_CHANGEADDR:
+		/* Adjust unicast filters on underlying device */
+		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+			vlandev = vlan_group_get_device(grp, i);
+			if (!vlandev)
+				continue;
+
+			vlan_sync_address(dev, vlandev);
+		}
+		break;
+
 	case NETDEV_DOWN:
 		/* Put all VLANs for this dev in the down state too.  */
 		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index fe6bb0f..62ce1c5 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -58,7 +58,6 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev);
 int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev);
 int vlan_dev_change_mtu(struct net_device *dev, int new_mtu);
-int vlan_dev_set_mac_address(struct net_device *dev, void* addr);
 int vlan_dev_open(struct net_device* dev);
 int vlan_dev_stop(struct net_device* dev);
 int vlan_dev_ioctl(struct net_device* dev, struct ifreq *ifr, int cmd);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 95afe38..d4a62d1 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -612,44 +612,6 @@ void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result)
 	*result = VLAN_DEV_INFO(dev)->vlan_id;
 }
 
-int vlan_dev_set_mac_address(struct net_device *dev, void *addr_struct_p)
-{
-	struct sockaddr *addr = (struct sockaddr *)(addr_struct_p);
-	int i;
-
-	if (netif_running(dev))
-		return -EBUSY;
-
-	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
-
-	printk("%s: Setting MAC address to ", dev->name);
-	for (i = 0; i < 6; i++)
-		printk(" %2.2x", dev->dev_addr[i]);
-	printk(".\n");
-
-	if (memcmp(VLAN_DEV_INFO(dev)->real_dev->dev_addr,
-		   dev->dev_addr,
-		   dev->addr_len) != 0) {
-		if (!(VLAN_DEV_INFO(dev)->real_dev->flags & IFF_PROMISC)) {
-			int flgs = VLAN_DEV_INFO(dev)->real_dev->flags;
-
-			/* Increment our in-use promiscuity counter */
-			dev_set_promiscuity(VLAN_DEV_INFO(dev)->real_dev, 1);
-
-			/* Make PROMISC visible to the user. */
-			flgs |= IFF_PROMISC;
-			printk("VLAN (%s):  Setting underlying device (%s) to promiscious mode.\n",
-			       dev->name, VLAN_DEV_INFO(dev)->real_dev->name);
-			dev_change_flags(VLAN_DEV_INFO(dev)->real_dev, flgs);
-		}
-	} else {
-		printk("VLAN (%s):  Underlying device (%s) has same MAC, not checking promiscious mode.\n",
-		       dev->name, VLAN_DEV_INFO(dev)->real_dev->name);
-	}
-
-	return 0;
-}
-
 static inline int vlan_dmi_equals(struct dev_mc_list *dmi1,
 				  struct dev_mc_list *dmi2)
 {
@@ -736,15 +698,32 @@ static void vlan_flush_mc_list(struct net_device *dev)
 
 int vlan_dev_open(struct net_device *dev)
 {
-	if (!(VLAN_DEV_INFO(dev)->real_dev->flags & IFF_UP))
+	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct net_device *real_dev = vlan->real_dev;
+	int err;
+
+	if (!(real_dev->flags & IFF_UP))
 		return -ENETDOWN;
 
+	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) {
+		err = dev_unicast_add(real_dev, dev->dev_addr, ETH_ALEN);
+		if (err < 0)
+			return err;
+	}
+	memcpy(vlan->real_dev_addr, real_dev->dev_addr, ETH_ALEN);
+
 	return 0;
 }
 
 int vlan_dev_stop(struct net_device *dev)
 {
+	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
+
 	vlan_flush_mc_list(dev);
+
+	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
+		dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len);
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 2d85cba2b272a5201a60966a65a4f8c0bcc0bb71 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 11 Jul 2007 19:42:13 -0700
Subject: [RTNETLINK]: rtnl_link API simplification

All drivers need to unregister their devices in the module unload function.
While doing so they must hold the rtnl and atomically unregister the
rtnl_link ops as well. This makes the rtnl_link_unregister function that
takes the rtnl itself completely useless.

Provide default newlink/dellink functions, make __rtnl_link_unregister and
rtnl_link_unregister unregister all devices with matching rtnl_link_ops and
change the existing users to take advantage of that.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 91126b9..373ff70 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -37,11 +37,6 @@
 #include <linux/rtnetlink.h>
 #include <net/rtnetlink.h>
 
-struct dummy_priv {
-	struct net_device *dev;
-	struct list_head list;
-};
-
 static int numdummies = 1;
 
 static int dummy_xmit(struct sk_buff *skb, struct net_device *dev);
@@ -89,37 +84,9 @@ static int dummy_xmit(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-static LIST_HEAD(dummies);
-
-static int dummy_newlink(struct net_device *dev,
-			 struct nlattr *tb[], struct nlattr *data[])
-{
-	struct dummy_priv *priv = netdev_priv(dev);
-	int err;
-
-	err = register_netdevice(dev);
-	if (err < 0)
-		return err;
-
-	priv->dev = dev;
-	list_add_tail(&priv->list, &dummies);
-	return 0;
-}
-
-static void dummy_dellink(struct net_device *dev)
-{
-	struct dummy_priv *priv = netdev_priv(dev);
-
-	list_del(&priv->list);
-	unregister_netdevice(dev);
-}
-
 static struct rtnl_link_ops dummy_link_ops __read_mostly = {
 	.kind		= "dummy",
-	.priv_size	= sizeof(struct dummy_priv),
 	.setup		= dummy_setup,
-	.newlink	= dummy_newlink,
-	.dellink	= dummy_dellink,
 };
 
 /* Number of dummy devices to be set up by this module. */
@@ -129,12 +96,9 @@ MODULE_PARM_DESC(numdummies, "Number of dummy pseudo devices");
 static int __init dummy_init_one(void)
 {
 	struct net_device *dev_dummy;
-	struct dummy_priv *priv;
 	int err;
 
-	dev_dummy = alloc_netdev(sizeof(struct dummy_priv), "dummy%d",
-				 dummy_setup);
-
+	dev_dummy = alloc_netdev(0, "dummy%d", dummy_setup);
 	if (!dev_dummy)
 		return -ENOMEM;
 
@@ -146,10 +110,6 @@ static int __init dummy_init_one(void)
 	err = register_netdevice(dev_dummy);
 	if (err < 0)
 		goto err;
-
-	priv = netdev_priv(dev_dummy);
-	priv->dev = dev_dummy;
-	list_add_tail(&priv->list, &dummies);
 	return 0;
 
 err:
@@ -159,7 +119,6 @@ err:
 
 static int __init dummy_init_module(void)
 {
-	struct dummy_priv *priv, *next;
 	int i, err = 0;
 
 	rtnl_lock();
@@ -167,11 +126,8 @@ static int __init dummy_init_module(void)
 
 	for (i = 0; i < numdummies && !err; i++)
 		err = dummy_init_one();
-	if (err < 0) {
-		list_for_each_entry_safe(priv, next, &dummies, list)
-			dummy_dellink(priv->dev);
+	if (err < 0)
 		__rtnl_link_unregister(&dummy_link_ops);
-	}
 	rtnl_unlock();
 
 	return err;
@@ -179,14 +135,7 @@ static int __init dummy_init_module(void)
 
 static void __exit dummy_cleanup_module(void)
 {
-	struct dummy_priv *priv, *next;
-
-	rtnl_lock();
-	list_for_each_entry_safe(priv, next, &dummies, list)
-		dummy_dellink(priv->dev);
-
-	__rtnl_link_unregister(&dummy_link_ops);
-	rtnl_unlock();
+	rtnl_link_unregister(&dummy_link_ops);
 }
 
 module_init(dummy_init_module);
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 669ee1a..c8e7c8f 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -33,15 +33,12 @@
 #include <linux/etherdevice.h>
 #include <linux/init.h>
 #include <linux/moduleparam.h>
-#include <linux/list.h>
 #include <net/pkt_sched.h>
 
 #define TX_TIMEOUT  (2*HZ)
 
 #define TX_Q_LIMIT    32
 struct ifb_private {
-	struct list_head	list;
-	struct net_device	*dev;
 	struct net_device_stats stats;
 	struct tasklet_struct   ifb_tasklet;
 	int     tasklet_pending;
@@ -201,12 +198,6 @@ static struct net_device_stats *ifb_get_stats(struct net_device *dev)
 	return stats;
 }
 
-static LIST_HEAD(ifbs);
-
-/* Number of ifb devices to be set up by this module. */
-module_param(numifbs, int, 0);
-MODULE_PARM_DESC(numifbs, "Number of ifb devices");
-
 static int ifb_close(struct net_device *dev)
 {
 	struct ifb_private *dp = netdev_priv(dev);
@@ -230,41 +221,19 @@ static int ifb_open(struct net_device *dev)
 	return 0;
 }
 
-static int ifb_newlink(struct net_device *dev,
-		       struct nlattr *tb[], struct nlattr *data[])
-{
-	struct ifb_private *priv = netdev_priv(dev);
-	int err;
-
-	err = register_netdevice(dev);
-	if (err < 0)
-		return err;
-
-	priv->dev = dev;
-	list_add_tail(&priv->list, &ifbs);
-	return 0;
-}
-
-static void ifb_dellink(struct net_device *dev)
-{
-	struct ifb_private *priv = netdev_priv(dev);
-
-	list_del(&priv->list);
-	unregister_netdevice(dev);
-}
-
 static struct rtnl_link_ops ifb_link_ops __read_mostly = {
 	.kind		= "ifb",
 	.priv_size	= sizeof(struct ifb_private),
 	.setup		= ifb_setup,
-	.newlink	= ifb_newlink,
-	.dellink	= ifb_dellink,
 };
 
+/* Number of ifb devices to be set up by this module. */
+module_param(numifbs, int, 0);
+MODULE_PARM_DESC(numifbs, "Number of ifb devices");
+
 static int __init ifb_init_one(int index)
 {
 	struct net_device *dev_ifb;
-	struct ifb_private *priv;
 	int err;
 
 	dev_ifb = alloc_netdev(sizeof(struct ifb_private),
@@ -281,10 +250,6 @@ static int __init ifb_init_one(int index)
 	err = register_netdevice(dev_ifb);
 	if (err < 0)
 		goto err;
-
-	priv = netdev_priv(dev_ifb);
-	priv->dev = dev_ifb;
-	list_add_tail(&priv->list, &ifbs);
 	return 0;
 
 err:
@@ -294,7 +259,6 @@ err:
 
 static int __init ifb_init_module(void)
 {
-	struct ifb_private *priv, *next;
 	int i, err;
 
 	rtnl_lock();
@@ -302,11 +266,8 @@ static int __init ifb_init_module(void)
 
 	for (i = 0; i < numifbs && !err; i++)
 		err = ifb_init_one(i);
-	if (err) {
-		list_for_each_entry_safe(priv, next, &ifbs, list)
-			ifb_dellink(priv->dev);
+	if (err)
 		__rtnl_link_unregister(&ifb_link_ops);
-	}
 	rtnl_unlock();
 
 	return err;
@@ -314,14 +275,7 @@ static int __init ifb_init_module(void)
 
 static void __exit ifb_cleanup_module(void)
 {
-	struct ifb_private *priv, *next;
-
-	rtnl_lock();
-	list_for_each_entry_safe(priv, next, &ifbs, list)
-		ifb_dellink(priv->dev);
-
-	__rtnl_link_unregister(&ifb_link_ops);
-	rtnl_unlock();
+	rtnl_link_unregister(&ifb_link_ops);
 }
 
 module_init(ifb_init_module);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index b463ba4..34c1d0b 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -115,26 +115,6 @@ err1:
 	return err;
 }
 
-/* Cleanup all vlan devices
- * Note: devices that have been registered that but not
- * brought up will exist but have no module ref count.
- */
-static void __exit vlan_cleanup_devices(void)
-{
-	struct net_device *dev, *nxt;
-
-	rtnl_lock();
-	for_each_netdev_safe(dev, nxt) {
-		if (dev->priv_flags & IFF_802_1Q_VLAN) {
-			unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
-					    VLAN_DEV_INFO(dev)->vlan_id);
-
-			unregister_netdevice(dev);
-		}
-	}
-	rtnl_unlock();
-}
-
 /*
  *     Module 'remove' entry point.
  *     o delete /proc/net/router directory and static entries.
@@ -150,7 +130,6 @@ static void __exit vlan_cleanup_module(void)
 	unregister_netdevice_notifier(&vlan_notifier_block);
 
 	dev_remove_pack(&vlan_packet_type);
-	vlan_cleanup_devices();
 
 	/* This table must be empty if there are no module
 	 * references left.
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 54c17e4..7b6b163 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -270,6 +270,9 @@ static LIST_HEAD(link_ops);
  */
 int __rtnl_link_register(struct rtnl_link_ops *ops)
 {
+	if (!ops->dellink)
+		ops->dellink = unregister_netdevice;
+
 	list_add_tail(&ops->list, &link_ops);
 	return 0;
 }
@@ -298,12 +301,16 @@ EXPORT_SYMBOL_GPL(rtnl_link_register);
  * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
  * @ops: struct rtnl_link_ops * to unregister
  *
- * The caller must hold the rtnl_mutex. This function should be used
- * by drivers that unregister devices during module unloading. It must
- * be called after unregistering the devices.
+ * The caller must hold the rtnl_mutex.
  */
 void __rtnl_link_unregister(struct rtnl_link_ops *ops)
 {
+	struct net_device *dev, *n;
+
+	for_each_netdev_safe(dev, n) {
+		if (dev->rtnl_link_ops == ops)
+			ops->dellink(dev);
+	}
 	list_del(&ops->list);
 }
 
@@ -1067,7 +1074,10 @@ replay:
 		if (tb[IFLA_LINKMODE])
 			dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
 
-		err = ops->newlink(dev, tb, data);
+		if (ops->newlink)
+			err = ops->newlink(dev, tb, data);
+		else
+			err = register_netdevice(dev);
 err_free:
 		if (err < 0)
 			free_netdev(dev);
-- 
cgit v0.10.2


From 0e06877c6fdbc67b1132be895f995acd1ff30135 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 11 Jul 2007 19:42:31 -0700
Subject: [RTNETLINK]: rtnl_link: allow specifying initial device address

Drivers need to validate the initial addresses in their netlink attribute
validation function or manually reject them if they can't support this.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 373ff70..756a6bc 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -84,9 +84,21 @@ static int dummy_xmit(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
+static int dummy_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+			return -EINVAL;
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+			return -EADDRNOTAVAIL;
+	}
+	return 0;
+}
+
 static struct rtnl_link_ops dummy_link_ops __read_mostly = {
 	.kind		= "dummy",
 	.setup		= dummy_setup,
+	.validate	= dummy_validate,
 };
 
 /* Number of dummy devices to be set up by this module. */
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index c8e7c8f..f5c3598 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -221,10 +221,22 @@ static int ifb_open(struct net_device *dev)
 	return 0;
 }
 
+static int ifb_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+			return -EINVAL;
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+			return -EADDRNOTAVAIL;
+	}
+	return 0;
+}
+
 static struct rtnl_link_ops ifb_link_ops __read_mostly = {
 	.kind		= "ifb",
 	.priv_size	= sizeof(struct ifb_private),
 	.setup		= ifb_setup,
+	.validate	= ifb_validate,
 };
 
 /* Number of ifb devices to be set up by this module. */
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 34c1d0b..abb9900 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -324,8 +324,10 @@ static int vlan_dev_init(struct net_device *dev)
 					  (1<<__LINK_STATE_DORMANT))) |
 		      (1<<__LINK_STATE_PRESENT);
 
-	memcpy(dev->broadcast, real_dev->broadcast, real_dev->addr_len);
-	memcpy(dev->dev_addr, real_dev->dev_addr, real_dev->addr_len);
+	if (is_zero_ether_addr(dev->dev_addr))
+		memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len);
+	if (is_zero_ether_addr(dev->broadcast))
+		memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len);
 
 	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
 		dev->hard_header     = real_dev->hard_header;
@@ -373,6 +375,8 @@ void vlan_setup(struct net_device *new_dev)
 	new_dev->set_multicast_list = vlan_dev_set_multicast_list;
 	new_dev->destructor = free_netdev;
 	new_dev->do_ioctl = vlan_dev_ioctl;
+
+	memset(new_dev->broadcast, 0, sizeof(ETH_ALEN));
 }
 
 static void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev)
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 844c7e4..6cdd1e0 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -41,6 +41,13 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
 	u16 id;
 	int err;
 
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+			return -EINVAL;
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+			return -EADDRNOTAVAIL;
+	}
+
 	if (!data)
 		return -EINVAL;
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7b6b163..864cbdf 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1032,8 +1032,7 @@ replay:
 
 		if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change)
 			return -EOPNOTSUPP;
-		if (tb[IFLA_ADDRESS] || tb[IFLA_BROADCAST] || tb[IFLA_MAP] ||
-		    tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
+		if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
 			return -EOPNOTSUPP;
 
 		if (!ops) {
@@ -1065,6 +1064,12 @@ replay:
 
 		if (tb[IFLA_MTU])
 			dev->mtu = nla_get_u32(tb[IFLA_MTU]);
+		if (tb[IFLA_ADDRESS])
+			memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
+			       nla_len(tb[IFLA_ADDRESS]));
+		if (tb[IFLA_BROADCAST])
+			memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]),
+			       nla_len(tb[IFLA_BROADCAST]));
 		if (tb[IFLA_TXQLEN])
 			dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
 		if (tb[IFLA_WEIGHT])
-- 
cgit v0.10.2


From 662ad4f8efd3ba2ed710d36003f968b500e6f123 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 11 Jul 2007 19:43:52 -0700
Subject: [TCP]: tcp probe wraparound handling and other changes

Switch from formatting messages in probe routine and copying with
kfifo, to using a small circular queue of information and formatting
on read.  This avoids wraparound issues with kfifo, and saves one
copy.

Also make sure to state correct license, rather than copying off some
other driver I started with.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index d9323df..86624fa 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -6,8 +6,7 @@
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * the Free Software Foundation; either version 2 of the License.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -25,23 +24,22 @@
 #include <linux/tcp.h>
 #include <linux/proc_fs.h>
 #include <linux/module.h>
-#include <linux/kfifo.h>
 #include <linux/ktime.h>
 #include <linux/time.h>
-#include <linux/vmalloc.h>
 
 #include <net/tcp.h>
 
 MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>");
 MODULE_DESCRIPTION("TCP cwnd snooper");
 MODULE_LICENSE("GPL");
+MODULE_VERSION("1.1");
 
 static int port __read_mostly = 0;
 MODULE_PARM_DESC(port, "Port to match (0=all)");
 module_param(port, int, 0);
 
-static int bufsize __read_mostly = 64*1024;
-MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
+static int bufsize __read_mostly = 4096;
+MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)");
 module_param(bufsize, int, 0);
 
 static int full __read_mostly;
@@ -50,39 +48,38 @@ module_param(full, int, 0);
 
 static const char procname[] = "tcpprobe";
 
-struct {
-	struct kfifo	*fifo;
+struct tcp_log {
+	ktime_t tstamp;
+	__be32	saddr, daddr;
+	__be16	sport, dport;
+	u16	length;
+	u32	snd_nxt;
+	u32	snd_una;
+	u32	snd_wnd;
+	u32	snd_cwnd;
+	u32	ssthresh;
+	u32	srtt;
+};
+
+static struct {
 	spinlock_t	lock;
 	wait_queue_head_t wait;
 	ktime_t		start;
 	u32		lastcwnd;
-} tcpw;
 
-/*
- * Print to log with timestamps.
- * FIXME: causes an extra copy
- */
-static void printl(const char *fmt, ...)
-	__attribute__ ((format (printf, 1, 2)));
+	unsigned long	head, tail;
+	struct tcp_log	*log;
+} tcp_probe;
+
 
-static void printl(const char *fmt, ...)
+static inline int tcp_probe_used(void)
 {
-	va_list args;
-	int len;
-	struct timespec tv;
-	char tbuf[256];
-
-	va_start(args, fmt);
-	/* want monotonic time since start of tcp_probe */
-	tv = ktime_to_timespec(ktime_sub(ktime_get(), tcpw.start));
-
-	len = sprintf(tbuf, "%lu.%09lu ",
-		      (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec);
-	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
-	va_end(args);
-
-	kfifo_put(tcpw.fifo, tbuf, len);
-	wake_up(&tcpw.wait);
+	return (tcp_probe.head - tcp_probe.tail) % bufsize;
+}
+
+static inline int tcp_probe_avail(void)
+{
+	return bufsize - tcp_probe_used();
 }
 
 /*
@@ -97,63 +94,117 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
 	/* Only update if port matches */
 	if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port)
-	    && (full || tp->snd_cwnd != tcpw.lastcwnd)) {
-		printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u %u\n",
-		       NIPQUAD(inet->saddr), ntohs(inet->sport),
-		       NIPQUAD(inet->daddr), ntohs(inet->dport),
-		       skb->len, tp->snd_nxt, tp->snd_una,
-		       tp->snd_cwnd, tcp_current_ssthresh(sk),
-		       tp->snd_wnd, tp->srtt >> 3);
-		tcpw.lastcwnd = tp->snd_cwnd;
+	    && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
+
+		spin_lock(&tcp_probe.lock);
+		/* If log fills, just silently drop */
+		if (tcp_probe_avail() > 1) {
+			struct tcp_log *p = tcp_probe.log + tcp_probe.head;
+
+			p->tstamp = ktime_get();
+			p->saddr = inet->saddr;
+			p->sport = inet->sport;
+			p->daddr = inet->daddr;
+			p->dport = inet->dport;
+			p->length = skb->len;
+			p->snd_nxt = tp->snd_nxt;
+			p->snd_una = tp->snd_una;
+			p->snd_cwnd = tp->snd_cwnd;
+			p->snd_wnd = tp->snd_wnd;
+			p->srtt = tp->srtt >> 3;
+
+			tcp_probe.head = (tcp_probe.head + 1) % bufsize;
+		}
+		tcp_probe.lastcwnd = tp->snd_cwnd;
+		spin_unlock(&tcp_probe.lock);
+
+		wake_up(&tcp_probe.wait);
 	}
 
 	jprobe_return();
 	return 0;
 }
 
-static struct jprobe tcp_probe = {
+static struct jprobe tcp_jprobe = {
 	.kp = {
 		.symbol_name	= "tcp_rcv_established",
 	},
 	.entry	= JPROBE_ENTRY(jtcp_rcv_established),
 };
 
-
 static int tcpprobe_open(struct inode * inode, struct file * file)
 {
-	kfifo_reset(tcpw.fifo);
-	tcpw.start = ktime_get();
+	/* Reset (empty) log */
+	spin_lock_bh(&tcp_probe.lock);
+	tcp_probe.head = tcp_probe.tail = 0;
+	tcp_probe.start = ktime_get();
+	spin_unlock_bh(&tcp_probe.lock);
+
 	return 0;
 }
 
+static int tcpprobe_sprint(char *tbuf, int n)
+{
+	const struct tcp_log *p
+		= tcp_probe.log + tcp_probe.tail % bufsize;
+	struct timespec tv
+		= ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start));
+
+	return snprintf(tbuf, n,
+			"%lu.%09lu %d.%d.%d.%d:%u %d.%d.%d.%d:%u"
+			" %d %#x %#x %u %u %u %u\n",
+			(unsigned long) tv.tv_sec,
+			(unsigned long) tv.tv_nsec,
+			NIPQUAD(p->saddr), ntohs(p->sport),
+			NIPQUAD(p->daddr), ntohs(p->dport),
+			p->length, p->snd_nxt, p->snd_una,
+			p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt);
+}
+
 static ssize_t tcpprobe_read(struct file *file, char __user *buf,
 			     size_t len, loff_t *ppos)
 {
 	int error = 0, cnt = 0;
-	unsigned char *tbuf;
 
 	if (!buf || len < 0)
 		return -EINVAL;
 
-	if (len == 0)
-		return 0;
+	while (cnt < len) {
+		char tbuf[128];
+		int width;
+
+		/* Wait for data in buffer */
+		error = wait_event_interruptible(tcp_probe.wait,
+						 tcp_probe_used() > 0);
+		if (error)
+			break;
 
-	tbuf = vmalloc(len);
-	if (!tbuf)
-		return -ENOMEM;
+		spin_lock_bh(&tcp_probe.lock);
+		if (tcp_probe.head == tcp_probe.tail) {
+			/* multiple readers race? */
+			spin_unlock_bh(&tcp_probe.lock);
+			continue;
+		}
 
-	error = wait_event_interruptible(tcpw.wait,
-					 __kfifo_len(tcpw.fifo) != 0);
-	if (error)
-		goto out_free;
+		width = tcpprobe_sprint(tbuf, sizeof(tbuf));
 
-	cnt = kfifo_get(tcpw.fifo, tbuf, len);
-	error = copy_to_user(buf, tbuf, cnt);
+		if (width < len)
+			tcp_probe.tail = (tcp_probe.tail + 1) % bufsize;
 
-out_free:
-	vfree(tbuf);
+		spin_unlock_bh(&tcp_probe.lock);
+
+		/* if record greater than space available
+		   return partial buffer (so far) */
+		if (width >= len)
+			break;
+
+		error = copy_to_user(buf + cnt, tbuf, width);
+		if (error)
+			break;
+		cnt += width;
+	}
 
-	return error ? error : cnt;
+	return cnt == 0 ? error : cnt;
 }
 
 static const struct file_operations tcpprobe_fops = {
@@ -166,34 +217,37 @@ static __init int tcpprobe_init(void)
 {
 	int ret = -ENOMEM;
 
-	init_waitqueue_head(&tcpw.wait);
-	spin_lock_init(&tcpw.lock);
-	tcpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &tcpw.lock);
-	if (IS_ERR(tcpw.fifo))
-		return PTR_ERR(tcpw.fifo);
+	init_waitqueue_head(&tcp_probe.wait);
+	spin_lock_init(&tcp_probe.lock);
+
+	if (bufsize < 0)
+		return -EINVAL;
+
+	tcp_probe.log = kcalloc(sizeof(struct tcp_log), bufsize, GFP_KERNEL);
+	if (!tcp_probe.log)
+		goto err0;
 
 	if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops))
 		goto err0;
 
-	ret = register_jprobe(&tcp_probe);
+	ret = register_jprobe(&tcp_jprobe);
 	if (ret)
 		goto err1;
 
-	pr_info("TCP watch registered (port=%d)\n", port);
+	pr_info("TCP probe registered (port=%d)\n", port);
 	return 0;
  err1:
 	proc_net_remove(procname);
  err0:
-	kfifo_free(tcpw.fifo);
+	kfree(tcp_probe.log);
 	return ret;
 }
 module_init(tcpprobe_init);
 
 static __exit void tcpprobe_exit(void)
 {
-	kfifo_free(tcpw.fifo);
 	proc_net_remove(procname);
-	unregister_jprobe(&tcp_probe);
-
+	unregister_jprobe(&tcp_jprobe);
+	kfree(tcp_probe.log);
 }
 module_exit(tcpprobe_exit);
-- 
cgit v0.10.2


From db3d99c090e0cdb34b1274767e062bfddbb384bc Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 11 Jul 2007 19:46:26 -0700
Subject: [NET_SCHED]: ematch: module autoloading

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index c3f01b3..30b8571 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -403,16 +403,13 @@ enum
  *   1..32767		Reserved for ematches inside kernel tree
  *   32768..65535	Free to use, not reliable
  */
-enum
-{
-	TCF_EM_CONTAINER,
-	TCF_EM_CMP,
-	TCF_EM_NBYTE,
-	TCF_EM_U32,
-	TCF_EM_META,
-	TCF_EM_TEXT,
-	__TCF_EM_MAX
-};
+#define	TCF_EM_CONTAINER	0
+#define	TCF_EM_CMP		1
+#define	TCF_EM_NBYTE		2
+#define	TCF_EM_U32		3
+#define	TCF_EM_META		4
+#define	TCF_EM_TEXT		5
+#define	TCF_EM_MAX		5
 
 enum
 {
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 4129df7..6c29920 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -306,6 +306,8 @@ static inline int tcf_em_tree_match(struct sk_buff *skb,
 		return 1;
 }
 
+#define MODULE_ALIAS_TCF_EMATCH(kind)	MODULE_ALIAS("ematch-kind-" __stringify(kind))
+
 #else /* CONFIG_NET_EMATCH */
 
 struct tcf_ematch_tree
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index 8d6dacd..cc49c93 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -98,3 +98,4 @@ MODULE_LICENSE("GPL");
 module_init(init_em_cmp);
 module_exit(exit_em_cmp);
 
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_CMP);
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 60acf8c..650f09c 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -848,3 +848,5 @@ MODULE_LICENSE("GPL");
 
 module_init(init_em_meta);
 module_exit(exit_em_meta);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_META);
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index b4b36ef..370a1b2 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -76,3 +76,5 @@ MODULE_LICENSE("GPL");
 
 module_init(init_em_nbyte);
 module_exit(exit_em_nbyte);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_NBYTE);
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index e8f4616..d5cd86e 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -150,3 +150,5 @@ MODULE_LICENSE("GPL");
 
 module_init(init_em_text);
 module_exit(exit_em_text);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_TEXT);
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index 0a2a7fe..112796e 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -60,3 +60,5 @@ MODULE_LICENSE("GPL");
 
 module_init(init_em_u32);
 module_exit(exit_em_u32);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_U32);
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 2483739..f3a104e 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -222,6 +222,19 @@ static int tcf_em_validate(struct tcf_proto *tp,
 
 		if (em->ops == NULL) {
 			err = -ENOENT;
+#ifdef CONFIG_KMOD
+			__rtnl_unlock();
+			request_module("ematch-kind-%u", em_hdr->kind);
+			rtnl_lock();
+			em->ops = tcf_em_lookup(em_hdr->kind);
+			if (em->ops) {
+				/* We dropped the RTNL mutex in order to
+				 * perform the module load. Tell the caller
+				 * to replay the request. */
+				module_put(em->ops->owner);
+				err = -EAGAIN;
+			}
+#endif
 			goto errout;
 		}
 
-- 
cgit v0.10.2


From 469665459d26da8d0b46c70d070da1e192e48e46 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Wed, 11 Jul 2007 19:47:19 -0700
Subject: [TG3]: Fix irq_sync race condition.

Gagan Arneja <gaagaan@gmail.com> pointed out that tg3_reset_task()
could potentially race with another thread calling tg3_full_lock()
such as the ethtool_set_xxx() functions.  This may trigger the
BUG_ON() in tg3_irq_quiesce() or cause the irq_sync flag to be out-
of-sync.

I think the easiest way to fix this is to get the tp->lock first
before setting the irq_sync flag.  This is safe to do because the
tp->lock is never grabbed by the irq handler.  This change will
guarantee that the irq_sync flag updates will be serialized.  We also
have to change one spot to call tg3_netif_start() (which clears the
irq_sync flag) before releasing the tp->lock.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 3a43426..314f5cf 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3512,9 +3512,9 @@ static inline int tg3_irq_sync(struct tg3 *tp)
  */
 static inline void tg3_full_lock(struct tg3 *tp, int irq_sync)
 {
+	spin_lock_bh(&tp->lock);
 	if (irq_sync)
 		tg3_irq_quiesce(tp);
-	spin_lock_bh(&tp->lock);
 }
 
 static inline void tg3_full_unlock(struct tg3 *tp)
@@ -9116,10 +9116,10 @@ static void tg3_vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
 	/* Update RX_MODE_KEEP_VLAN_TAG bit in RX_MODE register. */
 	__tg3_set_rx_mode(dev);
 
-	tg3_full_unlock(tp);
-
 	if (netif_running(dev))
 		tg3_netif_start(tp);
+
+	tg3_full_unlock(tp);
 }
 #endif
 
-- 
cgit v0.10.2


From e8f3f6cad7e423253090887bc4afe7bc844162da Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Wed, 11 Jul 2007 19:47:55 -0700
Subject: [TG3]: Fix the polarity bit.

For most pre-5705 devices, multiple link interrupts were being generated
for a single physical link change.  The source of the interrupts was
determined to be unnecessary toggling of the MAC link polarity bit.

This patch changes the way the link polarity bit gets configured.  Where
possible, code that dynamically configures the bit in response to link
changes has been replaced by code that configures the bit once during
initialization time and then leaves the bit alone.

For correctness, this patch also limits the use of the bit to those
devices where it is defined, namely devices before the 5705.  This patch
also corrects the link polarity configurations for 5700 devices when
paired against a bcm5411 phy.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 314f5cf..1df129a 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -1162,6 +1162,19 @@ static void tg3_frob_aux_power(struct tg3 *tp)
 	}
 }
 
+static int tg3_5700_link_polarity(struct tg3 *tp, u32 speed)
+{
+	if (tp->led_ctrl == LED_CTRL_MODE_PHY_2)
+		return 1;
+	else if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5411) {
+		if (speed != SPEED_10)
+			return 1;
+	} else if (speed == SPEED_10)
+		return 1;
+
+	return 0;
+}
+
 static int tg3_setup_phy(struct tg3 *, int);
 
 #define RESET_KIND_SHUTDOWN	0
@@ -1320,9 +1333,17 @@ static int tg3_set_power_state(struct tg3 *tp, pci_power_t state)
 			else
 				mac_mode = MAC_MODE_PORT_MODE_MII;
 
-			if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700 ||
-			    !(tp->tg3_flags & TG3_FLAG_WOL_SPEED_100MB))
-				mac_mode |= MAC_MODE_LINK_POLARITY;
+			mac_mode |= tp->mac_mode & MAC_MODE_LINK_POLARITY;
+			if (GET_ASIC_REV(tp->pci_chip_rev_id) ==
+			    ASIC_REV_5700) {
+				u32 speed = (tp->tg3_flags &
+					     TG3_FLAG_WOL_SPEED_100MB) ?
+					     SPEED_100 : SPEED_10;
+				if (tg3_5700_link_polarity(tp, speed))
+					mac_mode |= MAC_MODE_LINK_POLARITY;
+				else
+					mac_mode &= ~MAC_MODE_LINK_POLARITY;
+			}
 		} else {
 			mac_mode = MAC_MODE_PORT_MODE_TBI;
 		}
@@ -1990,15 +2011,12 @@ relink:
 	if (tp->link_config.active_duplex == DUPLEX_HALF)
 		tp->mac_mode |= MAC_MODE_HALF_DUPLEX;
 
-	tp->mac_mode &= ~MAC_MODE_LINK_POLARITY;
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700) {
-		if ((tp->led_ctrl == LED_CTRL_MODE_PHY_2) ||
-		    (current_link_up == 1 &&
-		     tp->link_config.active_speed == SPEED_10))
-			tp->mac_mode |= MAC_MODE_LINK_POLARITY;
-	} else {
-		if (current_link_up == 1)
+		if (current_link_up == 1 &&
+		    tg3_5700_link_polarity(tp, tp->link_config.active_speed))
 			tp->mac_mode |= MAC_MODE_LINK_POLARITY;
+		else
+			tp->mac_mode &= ~MAC_MODE_LINK_POLARITY;
 	}
 
 	/* ??? Without this setting Netgear GA302T PHY does not
@@ -2639,6 +2657,9 @@ static int tg3_setup_fiber_by_hand(struct tg3 *tp, u32 mac_status)
 
 		tw32_f(MAC_MODE, (tp->mac_mode | MAC_MODE_SEND_CONFIGS));
 		udelay(40);
+
+		tw32_f(MAC_MODE, tp->mac_mode);
+		udelay(40);
 	}
 
 out:
@@ -2698,10 +2719,6 @@ static int tg3_setup_fiber_phy(struct tg3 *tp, int force_reset)
 	else
 		current_link_up = tg3_setup_fiber_by_hand(tp, mac_status);
 
-	tp->mac_mode &= ~MAC_MODE_LINK_POLARITY;
-	tw32_f(MAC_MODE, tp->mac_mode);
-	udelay(40);
-
 	tp->hw_status->status =
 		(SD_STATUS_UPDATED |
 		 (tp->hw_status->status & ~SD_STATUS_LINK_CHG));
@@ -6444,6 +6461,10 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 
 	tp->mac_mode = MAC_MODE_TXSTAT_ENABLE | MAC_MODE_RXSTAT_ENABLE |
 		MAC_MODE_TDE_ENABLE | MAC_MODE_RDE_ENABLE | MAC_MODE_FHDE_ENABLE;
+	if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS) &&
+	    !(tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) &&
+	    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700)
+		tp->mac_mode |= MAC_MODE_LINK_POLARITY;
 	tw32_f(MAC_MODE, tp->mac_mode | MAC_MODE_RXSTAT_CLEAR | MAC_MODE_TXSTAT_CLEAR);
 	udelay(40);
 
@@ -8805,7 +8826,9 @@ static int tg3_run_loopback(struct tg3 *tp, int loopback_mode)
 			return 0;
 
 		mac_mode = (tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK) |
-			   MAC_MODE_PORT_INT_LPBACK | MAC_MODE_LINK_POLARITY;
+			   MAC_MODE_PORT_INT_LPBACK;
+		if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS))
+			mac_mode |= MAC_MODE_LINK_POLARITY;
 		if (tp->tg3_flags & TG3_FLAG_10_100_ONLY)
 			mac_mode |= MAC_MODE_PORT_MODE_MII;
 		else
@@ -8835,8 +8858,7 @@ static int tg3_run_loopback(struct tg3 *tp, int loopback_mode)
 		tg3_writephy(tp, MII_BMCR, val);
 		udelay(40);
 
-		mac_mode = (tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK) |
-			   MAC_MODE_LINK_POLARITY;
+		mac_mode = tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK;
 		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
 			tg3_writephy(tp, MII_TG3_EPHY_PTEST, 0x1800);
 			mac_mode |= MAC_MODE_PORT_MODE_MII;
@@ -8849,8 +8871,11 @@ static int tg3_run_loopback(struct tg3 *tp, int loopback_mode)
 			udelay(10);
 			tw32_f(MAC_RX_MODE, tp->rx_mode);
 		}
-		if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5401) {
-			mac_mode &= ~MAC_MODE_LINK_POLARITY;
+		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700) {
+			if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5401)
+				mac_mode &= ~MAC_MODE_LINK_POLARITY;
+			else if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5411)
+				mac_mode |= MAC_MODE_LINK_POLARITY;
 			tg3_writephy(tp, MII_TG3_EXT_CTRL,
 				     MII_TG3_EXT_CTRL_LNK3_LED_MODE);
 		}
-- 
cgit v0.10.2


From 9ef8ca99749784644602535691f8cf201ee2a225 Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Wed, 11 Jul 2007 19:48:29 -0700
Subject: [TG3]: Enable auto MDI.

This patch adds automatic MDI crossover support when autonegotiation is
turned off.  Automatic MDI crossover allows link to be established
without the use of a crossover cable.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 1df129a..4f59e5c 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -721,6 +721,44 @@ static int tg3_writephy(struct tg3 *tp, int reg, u32 val)
 	return ret;
 }
 
+static void tg3_phy_toggle_automdix(struct tg3 *tp, int enable)
+{
+	u32 phy;
+
+	if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS) ||
+	    (tp->tg3_flags2 & TG3_FLG2_ANY_SERDES))
+		return;
+
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
+		u32 ephy;
+
+		if (!tg3_readphy(tp, MII_TG3_EPHY_TEST, &ephy)) {
+			tg3_writephy(tp, MII_TG3_EPHY_TEST,
+				     ephy | MII_TG3_EPHY_SHADOW_EN);
+			if (!tg3_readphy(tp, MII_TG3_EPHYTST_MISCCTRL, &phy)) {
+				if (enable)
+					phy |= MII_TG3_EPHYTST_MISCCTRL_MDIX;
+				else
+					phy &= ~MII_TG3_EPHYTST_MISCCTRL_MDIX;
+				tg3_writephy(tp, MII_TG3_EPHYTST_MISCCTRL, phy);
+			}
+			tg3_writephy(tp, MII_TG3_EPHY_TEST, ephy);
+		}
+	} else {
+		phy = MII_TG3_AUXCTL_MISC_RDSEL_MISC |
+		      MII_TG3_AUXCTL_SHDWSEL_MISC;
+		if (!tg3_writephy(tp, MII_TG3_AUX_CTRL, phy) &&
+		    !tg3_readphy(tp, MII_TG3_AUX_CTRL, &phy)) {
+			if (enable)
+				phy |= MII_TG3_AUXCTL_MISC_FORCE_AMDIX;
+			else
+				phy &= ~MII_TG3_AUXCTL_MISC_FORCE_AMDIX;
+			phy |= MII_TG3_AUXCTL_MISC_WREN;
+			tg3_writephy(tp, MII_TG3_AUX_CTRL, phy);
+		}
+	}
+}
+
 static void tg3_phy_set_wirespeed(struct tg3 *tp)
 {
 	u32 val;
@@ -1045,23 +1083,11 @@ out:
 	}
 
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
-		u32 phy_reg;
-
 		/* adjust output voltage */
 		tg3_writephy(tp, MII_TG3_EPHY_PTEST, 0x12);
-
-		if (!tg3_readphy(tp, MII_TG3_EPHY_TEST, &phy_reg)) {
-			u32 phy_reg2;
-
-			tg3_writephy(tp, MII_TG3_EPHY_TEST,
-				     phy_reg | MII_TG3_EPHY_SHADOW_EN);
-			/* Enable auto-MDIX */
-			if (!tg3_readphy(tp, 0x10, &phy_reg2))
-				tg3_writephy(tp, 0x10, phy_reg2 | 0x4000);
-			tg3_writephy(tp, MII_TG3_EPHY_TEST, phy_reg);
-		}
 	}
 
+	tg3_phy_toggle_automdix(tp, 1);
 	tg3_phy_set_wirespeed(tp);
 	return 0;
 }
@@ -8847,14 +8873,14 @@ static int tg3_run_loopback(struct tg3 *tp, int loopback_mode)
 					     phytest | MII_TG3_EPHY_SHADOW_EN);
 				if (!tg3_readphy(tp, 0x1b, &phy))
 					tg3_writephy(tp, 0x1b, phy & ~0x20);
-				if (!tg3_readphy(tp, 0x10, &phy))
-					tg3_writephy(tp, 0x10, phy & ~0x4000);
 				tg3_writephy(tp, MII_TG3_EPHY_TEST, phytest);
 			}
 			val = BMCR_LOOPBACK | BMCR_FULLDPLX | BMCR_SPEED100;
 		} else
 			val = BMCR_LOOPBACK | BMCR_FULLDPLX | BMCR_SPEED1000;
 
+		tg3_phy_toggle_automdix(tp, 0);
+
 		tg3_writephy(tp, MII_BMCR, val);
 		udelay(40);
 
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index bd9f4f4..e1b9381 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -1642,6 +1642,11 @@
 
 #define MII_TG3_AUX_CTRL		0x18 /* auxilliary control register */
 
+#define MII_TG3_AUXCTL_MISC_WREN	0x8000
+#define MII_TG3_AUXCTL_MISC_FORCE_AMDIX	0x0200
+#define MII_TG3_AUXCTL_MISC_RDSEL_MISC	0x7000
+#define MII_TG3_AUXCTL_SHDWSEL_MISC		0x0007
+
 #define MII_TG3_AUX_STAT		0x19 /* auxilliary status register */
 #define MII_TG3_AUX_STAT_LPASS		0x0004
 #define MII_TG3_AUX_STAT_SPDMASK	0x0700
@@ -1667,6 +1672,9 @@
 #define MII_TG3_EPHY_TEST		0x1f /* 5906 PHY register */
 #define MII_TG3_EPHY_SHADOW_EN		0x80
 
+#define MII_TG3_EPHYTST_MISCCTRL	0x10 /* 5906 EPHY misc ctrl shadow register */
+#define MII_TG3_EPHYTST_MISCCTRL_MDIX	0x4000
+
 #define MII_TG3_TEST1			0x1e
 #define MII_TG3_TEST1_TRIM_EN		0x0010
 #define MII_TG3_TEST1_CRC_EN		0x8000
-- 
cgit v0.10.2


From 70b65a2d628d2e66bbf044bb764be64949f3580c Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Wed, 11 Jul 2007 19:48:50 -0700
Subject: [TG3]: Add missing NVRAM strapping.

This patch adds a missing NVRAM strapping for 5755 devices.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 4f59e5c..0d40f9d 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -9461,11 +9461,13 @@ static void __devinit tg3_get_5755_nvram_info(struct tg3 *tp)
 		case FLASH_5755VENDOR_ATMEL_FLASH_1:
 		case FLASH_5755VENDOR_ATMEL_FLASH_2:
 		case FLASH_5755VENDOR_ATMEL_FLASH_3:
+		case FLASH_5755VENDOR_ATMEL_FLASH_5:
 			tp->nvram_jedecnum = JEDEC_ATMEL;
 			tp->tg3_flags |= TG3_FLAG_NVRAM_BUFFERED;
 			tp->tg3_flags2 |= TG3_FLG2_FLASH;
 			tp->nvram_pagesize = 264;
-			if (nvcfg1 == FLASH_5755VENDOR_ATMEL_FLASH_1)
+			if (nvcfg1 == FLASH_5755VENDOR_ATMEL_FLASH_1 ||
+			    nvcfg1 == FLASH_5755VENDOR_ATMEL_FLASH_5)
 				tp->nvram_size = (protect ? 0x3e200 : 0x80000);
 			else if (nvcfg1 == FLASH_5755VENDOR_ATMEL_FLASH_2)
 				tp->nvram_size = (protect ? 0x1f200 : 0x40000);
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index e1b9381..d84e75e 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -1467,6 +1467,7 @@
 #define  FLASH_5755VENDOR_ATMEL_FLASH_2	 0x03400002
 #define  FLASH_5755VENDOR_ATMEL_FLASH_3	 0x03400000
 #define  FLASH_5755VENDOR_ATMEL_FLASH_4	 0x00000003
+#define  FLASH_5755VENDOR_ATMEL_FLASH_5	 0x02000003
 #define  FLASH_5755VENDOR_ATMEL_EEPROM_64KHZ	 0x03c00003
 #define  FLASH_5755VENDOR_ATMEL_EEPROM_376KHZ	 0x03c00002
 #define  FLASH_5787VENDOR_ATMEL_EEPROM_64KHZ	 0x03000003
-- 
cgit v0.10.2


From 15028aad00ddf241581fbe74a02ec89cbb28d35d Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Wed, 11 Jul 2007 19:49:22 -0700
Subject: [TG3]: Update version to 3.78.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 0d40f9d..3245f16 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -64,8 +64,8 @@
 
 #define DRV_MODULE_NAME		"tg3"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"3.77"
-#define DRV_MODULE_RELDATE	"May 31, 2007"
+#define DRV_MODULE_VERSION	"3.78"
+#define DRV_MODULE_RELDATE	"July 11, 2007"
 
 #define TG3_DEF_MAC_MODE	0
 #define TG3_DEF_RX_MODE		0
-- 
cgit v0.10.2